2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static av_always_inline
171 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
173 AVCodecContext *avctx = s->avctx;
176 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
177 height != s->avctx->height) {
178 vp8_decode_flush_impl(s->avctx, 1);
180 ret = ff_set_dimensions(s->avctx, width, height);
185 s->mb_width = (s->avctx->coded_width + 15) / 16;
186 s->mb_height = (s->avctx->coded_height + 15) / 16;
188 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
189 avctx->thread_count > 1;
190 if (!s->mb_layout) { // Frame threading and one thread
191 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
192 sizeof(*s->macroblocks));
193 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
194 } else // Sliced threading
195 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
196 sizeof(*s->macroblocks));
197 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
198 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
199 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
201 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
202 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
204 return AVERROR(ENOMEM);
207 for (i = 0; i < MAX_THREADS; i++) {
208 s->thread_data[i].filter_strength =
209 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
210 if (!s->thread_data[i].filter_strength) {
212 return AVERROR(ENOMEM);
215 pthread_mutex_init(&s->thread_data[i].lock, NULL);
216 pthread_cond_init(&s->thread_data[i].cond, NULL);
220 s->macroblocks = s->macroblocks_base + 1;
225 static int vp7_update_dimensions(VP8Context *s, int width, int height)
227 return update_dimensions(s, width, height, IS_VP7);
230 static int vp8_update_dimensions(VP8Context *s, int width, int height)
232 return update_dimensions(s, width, height, IS_VP8);
236 static void parse_segment_info(VP8Context *s)
238 VP56RangeCoder *c = &s->c;
241 s->segmentation.update_map = vp8_rac_get(c);
242 s->segmentation.update_feature_data = vp8_rac_get(c);
244 if (s->segmentation.update_feature_data) {
245 s->segmentation.absolute_vals = vp8_rac_get(c);
247 for (i = 0; i < 4; i++)
248 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
250 for (i = 0; i < 4; i++)
251 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
253 if (s->segmentation.update_map)
254 for (i = 0; i < 3; i++)
255 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
258 static void update_lf_deltas(VP8Context *s)
260 VP56RangeCoder *c = &s->c;
263 for (i = 0; i < 4; i++) {
264 if (vp8_rac_get(c)) {
265 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
268 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
272 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
273 if (vp8_rac_get(c)) {
274 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
277 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
282 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
284 const uint8_t *sizes = buf;
288 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
290 buf += 3 * (s->num_coeff_partitions - 1);
291 buf_size -= 3 * (s->num_coeff_partitions - 1);
295 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
296 int size = AV_RL24(sizes + 3 * i);
297 if (buf_size - size < 0)
299 s->coeff_partition_size[i] = size;
301 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
308 s->coeff_partition_size[i] = buf_size;
309 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
314 static void vp7_get_quants(VP8Context *s)
316 VP56RangeCoder *c = &s->c;
318 int yac_qi = vp8_rac_get_uint(c, 7);
319 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
320 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
321 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
322 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
323 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
325 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
326 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
327 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
328 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
329 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
330 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
333 static void vp8_get_quants(VP8Context *s)
335 VP56RangeCoder *c = &s->c;
338 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
339 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
340 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
341 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
342 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
343 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
345 for (i = 0; i < 4; i++) {
346 if (s->segmentation.enabled) {
347 base_qi = s->segmentation.base_quant[i];
348 if (!s->segmentation.absolute_vals)
349 base_qi += s->quant.yac_qi;
351 base_qi = s->quant.yac_qi;
353 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
354 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
355 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
356 /* 101581>>16 is equivalent to 155/100 */
357 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
358 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
359 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
361 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
362 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
367 * Determine which buffers golden and altref should be updated with after this frame.
368 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
370 * Intra frames update all 3 references
371 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
372 * If the update (golden|altref) flag is set, it's updated with the current frame
373 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
374 * If the flag is not set, the number read means:
376 * 1: VP56_FRAME_PREVIOUS
377 * 2: update golden with altref, or update altref with golden
379 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
381 VP56RangeCoder *c = &s->c;
384 return VP56_FRAME_CURRENT;
386 switch (vp8_rac_get_uint(c, 2)) {
388 return VP56_FRAME_PREVIOUS;
390 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
392 return VP56_FRAME_NONE;
395 static void vp78_reset_probability_tables(VP8Context *s)
398 for (i = 0; i < 4; i++)
399 for (j = 0; j < 16; j++)
400 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
401 sizeof(s->prob->token[i][j]));
404 static void vp78_update_probability_tables(VP8Context *s)
406 VP56RangeCoder *c = &s->c;
409 for (i = 0; i < 4; i++)
410 for (j = 0; j < 8; j++)
411 for (k = 0; k < 3; k++)
412 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
413 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
414 int prob = vp8_rac_get_uint(c, 8);
415 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
416 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
420 #define VP7_MVC_SIZE 17
421 #define VP8_MVC_SIZE 19
423 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
426 VP56RangeCoder *c = &s->c;
430 for (i = 0; i < 4; i++)
431 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
433 for (i = 0; i < 3; i++)
434 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
436 // 17.2 MV probability update
437 for (i = 0; i < 2; i++)
438 for (j = 0; j < mvc_size; j++)
439 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
440 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
443 static void update_refs(VP8Context *s)
445 VP56RangeCoder *c = &s->c;
447 int update_golden = vp8_rac_get(c);
448 int update_altref = vp8_rac_get(c);
450 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
451 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
454 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
458 for (j = 1; j < 3; j++) {
459 for (i = 0; i < height / 2; i++)
460 memcpy(dst->data[j] + i * dst->linesize[j],
461 src->data[j] + i * src->linesize[j], width / 2);
465 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
466 const uint8_t *src, ptrdiff_t src_linesize,
467 int width, int height,
471 for (j = 0; j < height; j++) {
472 for (i = 0; i < width; i++) {
473 uint8_t y = src[j * src_linesize + i];
474 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
479 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
481 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
482 int beta = (int8_t) vp8_rac_get_uint(c, 8);
485 if (!s->keyframe && (alpha || beta)) {
486 int width = s->mb_width * 16;
487 int height = s->mb_height * 16;
490 if (!s->framep[VP56_FRAME_PREVIOUS] ||
491 !s->framep[VP56_FRAME_GOLDEN]) {
492 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
493 return AVERROR_INVALIDDATA;
497 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
499 /* preserve the golden frame, write a new previous frame */
500 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
501 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
502 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
505 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
507 copy_chroma(dst, src, width, height);
510 fade(dst->data[0], dst->linesize[0],
511 src->data[0], src->linesize[0],
512 width, height, alpha, beta);
518 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
520 VP56RangeCoder *c = &s->c;
521 int part1_size, hscale, vscale, i, j, ret;
522 int width = s->avctx->width;
523 int height = s->avctx->height;
526 return AVERROR_INVALIDDATA;
529 s->profile = (buf[0] >> 1) & 7;
530 if (s->profile > 1) {
531 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
532 return AVERROR_INVALIDDATA;
535 s->keyframe = !(buf[0] & 1);
537 part1_size = AV_RL24(buf) >> 4;
539 if (buf_size < 4 - s->profile + part1_size) {
540 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
541 return AVERROR_INVALIDDATA;
544 buf += 4 - s->profile;
545 buf_size -= 4 - s->profile;
547 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
549 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
553 buf_size -= part1_size;
555 /* A. Dimension information (keyframes only) */
557 width = vp8_rac_get_uint(c, 12);
558 height = vp8_rac_get_uint(c, 12);
559 hscale = vp8_rac_get_uint(c, 2);
560 vscale = vp8_rac_get_uint(c, 2);
561 if (hscale || vscale)
562 avpriv_request_sample(s->avctx, "Upscaling");
564 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
565 vp78_reset_probability_tables(s);
566 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
567 sizeof(s->prob->pred16x16));
568 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
569 sizeof(s->prob->pred8x8c));
570 for (i = 0; i < 2; i++)
571 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
572 sizeof(vp7_mv_default_prob[i]));
573 memset(&s->segmentation, 0, sizeof(s->segmentation));
574 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
575 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
578 if (s->keyframe || s->profile > 0)
579 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
581 /* B. Decoding information for all four macroblock-level features */
582 for (i = 0; i < 4; i++) {
583 s->feature_enabled[i] = vp8_rac_get(c);
584 if (s->feature_enabled[i]) {
585 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
587 for (j = 0; j < 3; j++)
588 s->feature_index_prob[i][j] =
589 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
591 if (vp7_feature_value_size[s->profile][i])
592 for (j = 0; j < 4; j++)
593 s->feature_value[i][j] =
594 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
598 s->segmentation.enabled = 0;
599 s->segmentation.update_map = 0;
600 s->lf_delta.enabled = 0;
602 s->num_coeff_partitions = 1;
603 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
607 if (!s->macroblocks_base || /* first frame */
608 width != s->avctx->width || height != s->avctx->height ||
609 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
610 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
614 /* C. Dequantization indices */
617 /* D. Golden frame update flag (a Flag) for interframes only */
619 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
620 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
624 s->update_probabilities = 1;
627 if (s->profile > 0) {
628 s->update_probabilities = vp8_rac_get(c);
629 if (!s->update_probabilities)
630 s->prob[1] = s->prob[0];
633 s->fade_present = vp8_rac_get(c);
636 /* E. Fading information for previous frame */
637 if (s->fade_present && vp8_rac_get(c)) {
638 if ((ret = vp7_fade_frame(s ,c)) < 0)
642 /* F. Loop filter type */
644 s->filter.simple = vp8_rac_get(c);
646 /* G. DCT coefficient ordering specification */
648 for (i = 1; i < 16; i++)
649 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
651 /* H. Loop filter levels */
653 s->filter.simple = vp8_rac_get(c);
654 s->filter.level = vp8_rac_get_uint(c, 6);
655 s->filter.sharpness = vp8_rac_get_uint(c, 3);
657 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
658 vp78_update_probability_tables(s);
660 s->mbskip_enabled = 0;
662 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
664 s->prob->intra = vp8_rac_get_uint(c, 8);
665 s->prob->last = vp8_rac_get_uint(c, 8);
666 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
672 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
674 VP56RangeCoder *c = &s->c;
675 int header_size, hscale, vscale, ret;
676 int width = s->avctx->width;
677 int height = s->avctx->height;
680 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
681 return AVERROR_INVALIDDATA;
684 s->keyframe = !(buf[0] & 1);
685 s->profile = (buf[0]>>1) & 7;
686 s->invisible = !(buf[0] & 0x10);
687 header_size = AV_RL24(buf) >> 5;
691 s->header_partition_size = header_size;
694 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
697 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
698 sizeof(s->put_pixels_tab));
699 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
700 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
701 sizeof(s->put_pixels_tab));
703 if (header_size > buf_size - 7 * s->keyframe) {
704 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
705 return AVERROR_INVALIDDATA;
709 if (AV_RL24(buf) != 0x2a019d) {
710 av_log(s->avctx, AV_LOG_ERROR,
711 "Invalid start code 0x%x\n", AV_RL24(buf));
712 return AVERROR_INVALIDDATA;
714 width = AV_RL16(buf + 3) & 0x3fff;
715 height = AV_RL16(buf + 5) & 0x3fff;
716 hscale = buf[4] >> 6;
717 vscale = buf[6] >> 6;
721 if (hscale || vscale)
722 avpriv_request_sample(s->avctx, "Upscaling");
724 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
725 vp78_reset_probability_tables(s);
726 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
727 sizeof(s->prob->pred16x16));
728 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
729 sizeof(s->prob->pred8x8c));
730 memcpy(s->prob->mvc, vp8_mv_default_prob,
731 sizeof(s->prob->mvc));
732 memset(&s->segmentation, 0, sizeof(s->segmentation));
733 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
736 ret = ff_vp56_init_range_decoder(c, buf, header_size);
740 buf_size -= header_size;
743 s->colorspace = vp8_rac_get(c);
745 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
746 s->fullrange = vp8_rac_get(c);
749 if ((s->segmentation.enabled = vp8_rac_get(c)))
750 parse_segment_info(s);
752 s->segmentation.update_map = 0; // FIXME: move this to some init function?
754 s->filter.simple = vp8_rac_get(c);
755 s->filter.level = vp8_rac_get_uint(c, 6);
756 s->filter.sharpness = vp8_rac_get_uint(c, 3);
758 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
759 s->lf_delta.update = vp8_rac_get(c);
760 if (s->lf_delta.update)
764 if (setup_partitions(s, buf, buf_size)) {
765 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
766 return AVERROR_INVALIDDATA;
769 if (!s->macroblocks_base || /* first frame */
770 width != s->avctx->width || height != s->avctx->height ||
771 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
772 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
779 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
780 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
783 // if we aren't saving this frame's probabilities for future frames,
784 // make a copy of the current probabilities
785 if (!(s->update_probabilities = vp8_rac_get(c)))
786 s->prob[1] = s->prob[0];
788 s->update_last = s->keyframe || vp8_rac_get(c);
790 vp78_update_probability_tables(s);
792 if ((s->mbskip_enabled = vp8_rac_get(c)))
793 s->prob->mbskip = vp8_rac_get_uint(c, 8);
796 s->prob->intra = vp8_rac_get_uint(c, 8);
797 s->prob->last = vp8_rac_get_uint(c, 8);
798 s->prob->golden = vp8_rac_get_uint(c, 8);
799 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
802 // Record the entropy coder state here so that hwaccels can use it.
803 s->c.code_word = vp56_rac_renorm(&s->c);
804 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
805 s->coder_state_at_header_end.range = s->c.high;
806 s->coder_state_at_header_end.value = s->c.code_word >> 16;
807 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
812 static av_always_inline
813 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
815 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
816 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
817 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
818 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
822 * Motion vector coding, 17.1.
824 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
828 if (vp56_rac_get_prob_branchy(c, p[0])) {
831 for (i = 0; i < 3; i++)
832 x += vp56_rac_get_prob(c, p[9 + i]) << i;
833 for (i = (vp7 ? 7 : 9); i > 3; i--)
834 x += vp56_rac_get_prob(c, p[9 + i]) << i;
835 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
839 const uint8_t *ps = p + 2;
840 bit = vp56_rac_get_prob(c, *ps);
843 bit = vp56_rac_get_prob(c, *ps);
846 x += vp56_rac_get_prob(c, *ps);
849 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
852 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
854 return read_mv_component(c, p, 1);
857 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
859 return read_mv_component(c, p, 0);
862 static av_always_inline
863 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
866 return vp7_submv_prob;
869 return vp8_submv_prob[4 - !!left];
871 return vp8_submv_prob[2];
872 return vp8_submv_prob[1 - !!left];
876 * Split motion vector prediction, 16.4.
877 * @returns the number of motion vectors parsed (2, 4 or 16)
879 static av_always_inline
880 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
881 int layout, int is_vp7)
885 VP8Macroblock *top_mb;
886 VP8Macroblock *left_mb = &mb[-1];
887 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
888 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
890 VP56mv *left_mv = left_mb->bmv;
891 VP56mv *cur_mv = mb->bmv;
893 if (!layout) // layout is inlined, s->mb_layout is not
896 top_mb = &mb[-s->mb_width - 1];
897 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
898 top_mv = top_mb->bmv;
900 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
901 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
902 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
904 part_idx = VP8_SPLITMVMODE_8x8;
906 part_idx = VP8_SPLITMVMODE_4x4;
909 num = vp8_mbsplit_count[part_idx];
910 mbsplits_cur = vp8_mbsplits[part_idx],
911 firstidx = vp8_mbfirstidx[part_idx];
912 mb->partitioning = part_idx;
914 for (n = 0; n < num; n++) {
916 uint32_t left, above;
917 const uint8_t *submv_prob;
920 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
922 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
924 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
926 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
928 submv_prob = get_submv_prob(left, above, is_vp7);
930 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
931 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
932 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
933 mb->bmv[n].y = mb->mv.y +
934 read_mv_component(c, s->prob->mvc[0], is_vp7);
935 mb->bmv[n].x = mb->mv.x +
936 read_mv_component(c, s->prob->mvc[1], is_vp7);
938 AV_ZERO32(&mb->bmv[n]);
941 AV_WN32A(&mb->bmv[n], above);
944 AV_WN32A(&mb->bmv[n], left);
952 * The vp7 reference decoder uses a padding macroblock column (added to right
953 * edge of the frame) to guard against illegal macroblock offsets. The
954 * algorithm has bugs that permit offsets to straddle the padding column.
955 * This function replicates those bugs.
957 * @param[out] edge_x macroblock x address
958 * @param[out] edge_y macroblock y address
960 * @return macroblock offset legal (boolean)
962 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
963 int xoffset, int yoffset, int boundary,
964 int *edge_x, int *edge_y)
966 int vwidth = mb_width + 1;
967 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
968 if (new < boundary || new % vwidth == vwidth - 1)
970 *edge_y = new / vwidth;
971 *edge_x = new % vwidth;
975 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
977 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
980 static av_always_inline
981 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
982 int mb_x, int mb_y, int layout)
984 VP8Macroblock *mb_edge[12];
985 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
986 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
989 uint8_t cnt[3] = { 0 };
990 VP56RangeCoder *c = &s->c;
993 AV_ZERO32(&near_mv[0]);
994 AV_ZERO32(&near_mv[1]);
995 AV_ZERO32(&near_mv[2]);
997 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
998 const VP7MVPred * pred = &vp7_mv_pred[i];
1001 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1002 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1003 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1004 ? s->macroblocks_base + 1 + edge_x +
1005 (s->mb_width + 1) * (edge_y + 1)
1006 : s->macroblocks + edge_x +
1007 (s->mb_height - edge_y - 1) * 2;
1008 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1010 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1011 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1013 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1014 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1018 AV_WN32A(&near_mv[CNT_NEAR], mv);
1022 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1031 cnt[idx] += vp7_mv_pred[i].score;
1034 mb->partitioning = VP8_SPLITMVMODE_NONE;
1036 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1037 mb->mode = VP8_MVMODE_MV;
1039 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1041 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1043 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1044 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1046 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1048 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1049 mb->mode = VP8_MVMODE_SPLIT;
1050 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1052 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1053 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1054 mb->bmv[0] = mb->mv;
1057 mb->mv = near_mv[CNT_NEAR];
1058 mb->bmv[0] = mb->mv;
1061 mb->mv = near_mv[CNT_NEAREST];
1062 mb->bmv[0] = mb->mv;
1065 mb->mode = VP8_MVMODE_ZERO;
1067 mb->bmv[0] = mb->mv;
1071 static av_always_inline
1072 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1073 int mb_x, int mb_y, int layout)
1075 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1078 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1079 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1081 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1082 int8_t *sign_bias = s->sign_bias;
1084 uint8_t cnt[4] = { 0 };
1085 VP56RangeCoder *c = &s->c;
1087 if (!layout) { // layout is inlined (s->mb_layout is not)
1088 mb_edge[0] = mb + 2;
1089 mb_edge[2] = mb + 1;
1091 mb_edge[0] = mb - s->mb_width - 1;
1092 mb_edge[2] = mb - s->mb_width - 2;
1095 AV_ZERO32(&near_mv[0]);
1096 AV_ZERO32(&near_mv[1]);
1097 AV_ZERO32(&near_mv[2]);
1099 /* Process MB on top, left and top-left */
1100 #define MV_EDGE_CHECK(n) \
1102 VP8Macroblock *edge = mb_edge[n]; \
1103 int edge_ref = edge->ref_frame; \
1104 if (edge_ref != VP56_FRAME_CURRENT) { \
1105 uint32_t mv = AV_RN32A(&edge->mv); \
1107 if (cur_sign_bias != sign_bias[edge_ref]) { \
1108 /* SWAR negate of the values in mv. */ \
1110 mv = ((mv & 0x7fff7fff) + \
1111 0x00010001) ^ (mv & 0x80008000); \
1113 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1114 AV_WN32A(&near_mv[++idx], mv); \
1115 cnt[idx] += 1 + (n != 2); \
1117 cnt[CNT_ZERO] += 1 + (n != 2); \
1125 mb->partitioning = VP8_SPLITMVMODE_NONE;
1126 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1127 mb->mode = VP8_MVMODE_MV;
1129 /* If we have three distinct MVs, merge first and last if they're the same */
1130 if (cnt[CNT_SPLITMV] &&
1131 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1132 cnt[CNT_NEAREST] += 1;
1134 /* Swap near and nearest if necessary */
1135 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1136 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1137 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1140 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1141 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1142 /* Choose the best mv out of 0,0 and the nearest mv */
1143 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1144 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1145 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1146 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1148 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1149 mb->mode = VP8_MVMODE_SPLIT;
1150 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1152 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1153 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1154 mb->bmv[0] = mb->mv;
1157 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1158 mb->bmv[0] = mb->mv;
1161 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1162 mb->bmv[0] = mb->mv;
1165 mb->mode = VP8_MVMODE_ZERO;
1167 mb->bmv[0] = mb->mv;
1171 static av_always_inline
1172 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1173 int mb_x, int keyframe, int layout)
1175 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1178 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1179 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1184 uint8_t *const left = s->intra4x4_pred_mode_left;
1186 top = mb->intra4x4_pred_mode_top;
1188 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1189 for (y = 0; y < 4; y++) {
1190 for (x = 0; x < 4; x++) {
1192 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1193 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1194 left[y] = top[x] = *intra4x4;
1200 for (i = 0; i < 16; i++)
1201 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1202 vp8_pred4x4_prob_inter);
1206 static av_always_inline
1207 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1208 VP8Macroblock *mb, int mb_x, int mb_y,
1209 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1211 VP56RangeCoder *c = &s->c;
1212 static const char * const vp7_feature_name[] = { "q-index",
1214 "partial-golden-update",
1219 for (i = 0; i < 4; i++) {
1220 if (s->feature_enabled[i]) {
1221 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1222 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1223 s->feature_index_prob[i]);
1224 av_log(s->avctx, AV_LOG_WARNING,
1225 "Feature %s present in macroblock (value 0x%x)\n",
1226 vp7_feature_name[i], s->feature_value[i][index]);
1230 } else if (s->segmentation.update_map) {
1231 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1232 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1233 } else if (s->segmentation.enabled)
1234 *segment = ref ? *ref : *segment;
1235 mb->segment = *segment;
1237 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1240 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1241 vp8_pred16x16_prob_intra);
1243 if (mb->mode == MODE_I4x4) {
1244 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1246 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1247 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1249 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1251 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1252 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1255 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1256 vp8_pred8x8c_prob_intra);
1257 mb->ref_frame = VP56_FRAME_CURRENT;
1258 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1260 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1262 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1263 : VP56_FRAME_GOLDEN;
1265 mb->ref_frame = VP56_FRAME_PREVIOUS;
1266 s->ref_count[mb->ref_frame - 1]++;
1268 // motion vectors, 16.3
1270 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1272 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1275 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1277 if (mb->mode == MODE_I4x4)
1278 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1280 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1282 mb->ref_frame = VP56_FRAME_CURRENT;
1283 mb->partitioning = VP8_SPLITMVMODE_NONE;
1284 AV_ZERO32(&mb->bmv[0]);
1289 * @param r arithmetic bitstream reader context
1290 * @param block destination for block coefficients
1291 * @param probs probabilities to use when reading trees from the bitstream
1292 * @param i initial coeff index, 0 unless a separate DC block is coded
1293 * @param qmul array holding the dc/ac dequant factor at position 0/1
1295 * @return 0 if no coeffs were decoded
1296 * otherwise, the index of the last coeff decoded plus one
1298 static av_always_inline
1299 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1300 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1301 int i, uint8_t *token_prob, int16_t qmul[2],
1302 const uint8_t scan[16], int vp7)
1304 VP56RangeCoder c = *r;
1309 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1313 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1315 break; // invalid input; blocks should end with EOB
1316 token_prob = probs[i][0];
1322 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1324 token_prob = probs[i + 1][1];
1326 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1327 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1329 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1333 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1334 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1335 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1336 } else { // DCT_CAT2
1338 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1339 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1341 } else { // DCT_CAT3 and up
1342 int a = vp56_rac_get_prob(&c, token_prob[8]);
1343 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1344 int cat = (a << 1) + b;
1345 coeff = 3 + (8 << cat);
1346 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1349 token_prob = probs[i + 1][2];
1351 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1358 static av_always_inline
1359 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1361 int16_t dc = block[0];
1369 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1370 block[0] = pred[0] = dc;
1375 block[0] = pred[0] = dc;
1381 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1383 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1384 int i, uint8_t *token_prob,
1386 const uint8_t scan[16])
1388 return decode_block_coeffs_internal(r, block, probs, i,
1389 token_prob, qmul, scan, IS_VP7);
1392 #ifndef vp8_decode_block_coeffs_internal
1393 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1395 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1396 int i, uint8_t *token_prob,
1399 return decode_block_coeffs_internal(r, block, probs, i,
1400 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1405 * @param c arithmetic bitstream reader context
1406 * @param block destination for block coefficients
1407 * @param probs probabilities to use when reading trees from the bitstream
1408 * @param i initial coeff index, 0 unless a separate DC block is coded
1409 * @param zero_nhood the initial prediction context for number of surrounding
1410 * all-zero blocks (only left/top, so 0-2)
1411 * @param qmul array holding the dc/ac dequant factor at position 0/1
1412 * @param scan scan pattern (VP7 only)
1414 * @return 0 if no coeffs were decoded
1415 * otherwise, the index of the last coeff decoded plus one
1417 static av_always_inline
1418 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1419 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1420 int i, int zero_nhood, int16_t qmul[2],
1421 const uint8_t scan[16], int vp7)
1423 uint8_t *token_prob = probs[i][zero_nhood];
1424 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1426 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1427 token_prob, qmul, scan)
1428 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1432 static av_always_inline
1433 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1434 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1437 int i, x, y, luma_start = 0, luma_ctx = 3;
1438 int nnz_pred, nnz, nnz_total = 0;
1439 int segment = mb->segment;
1442 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1443 nnz_pred = t_nnz[8] + l_nnz[8];
1445 // decode DC values and do hadamard
1446 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1447 nnz_pred, s->qmat[segment].luma_dc_qmul,
1448 ff_zigzag_scan, is_vp7);
1449 l_nnz[8] = t_nnz[8] = !!nnz;
1451 if (is_vp7 && mb->mode > MODE_I4x4) {
1452 nnz |= inter_predict_dc(td->block_dc,
1453 s->inter_dc_pred[mb->ref_frame - 1]);
1460 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1462 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1469 for (y = 0; y < 4; y++)
1470 for (x = 0; x < 4; x++) {
1471 nnz_pred = l_nnz[y] + t_nnz[x];
1472 nnz = decode_block_coeffs(c, td->block[y][x],
1473 s->prob->token[luma_ctx],
1474 luma_start, nnz_pred,
1475 s->qmat[segment].luma_qmul,
1476 s->prob[0].scan, is_vp7);
1477 /* nnz+block_dc may be one more than the actual last index,
1478 * but we don't care */
1479 td->non_zero_count_cache[y][x] = nnz + block_dc;
1480 t_nnz[x] = l_nnz[y] = !!nnz;
1485 // TODO: what to do about dimensions? 2nd dim for luma is x,
1486 // but for chroma it's (y<<1)|x
1487 for (i = 4; i < 6; i++)
1488 for (y = 0; y < 2; y++)
1489 for (x = 0; x < 2; x++) {
1490 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1491 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1492 s->prob->token[2], 0, nnz_pred,
1493 s->qmat[segment].chroma_qmul,
1494 s->prob[0].scan, is_vp7);
1495 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1496 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1500 // if there were no coded coeffs despite the macroblock not being marked skip,
1501 // we MUST not do the inner loop filter and should not do IDCT
1502 // Since skip isn't used for bitstream prediction, just manually set it.
1507 static av_always_inline
1508 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1509 uint8_t *src_cb, uint8_t *src_cr,
1510 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1512 AV_COPY128(top_border, src_y + 15 * linesize);
1514 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1515 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1519 static av_always_inline
1520 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1521 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1522 int mb_y, int mb_width, int simple, int xchg)
1524 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1526 src_cb -= uvlinesize;
1527 src_cr -= uvlinesize;
1529 #define XCHG(a, b, xchg) \
1537 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1538 XCHG(top_border, src_y, xchg);
1539 XCHG(top_border + 8, src_y + 8, 1);
1540 if (mb_x < mb_width - 1)
1541 XCHG(top_border + 32, src_y + 16, 1);
1543 // only copy chroma for normal loop filter
1544 // or to initialize the top row to 127
1545 if (!simple || !mb_y) {
1546 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1547 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1548 XCHG(top_border + 16, src_cb, 1);
1549 XCHG(top_border + 24, src_cr, 1);
1553 static av_always_inline
1554 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1557 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1559 return mb_y ? mode : LEFT_DC_PRED8x8;
1562 static av_always_inline
1563 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1566 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1568 return mb_y ? mode : HOR_PRED8x8;
1571 static av_always_inline
1572 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1576 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1578 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1580 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1581 case PLANE_PRED8x8: /* TM */
1582 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1587 static av_always_inline
1588 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1591 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1593 return mb_y ? mode : HOR_VP8_PRED;
1597 static av_always_inline
1598 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1599 int *copy_buf, int vp7)
1603 if (!mb_x && mb_y) {
1608 case DIAG_DOWN_LEFT_PRED:
1609 case VERT_LEFT_PRED:
1610 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1618 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1620 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1621 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1622 * as 16x16/8x8 DC */
1623 case DIAG_DOWN_RIGHT_PRED:
1624 case VERT_RIGHT_PRED:
1633 static av_always_inline
1634 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1635 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1637 int x, y, mode, nnz;
1640 /* for the first row, we need to run xchg_mb_border to init the top edge
1641 * to 127 otherwise, skip it if we aren't going to deblock */
1642 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1643 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1644 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1645 s->filter.simple, 1);
1647 if (mb->mode < MODE_I4x4) {
1648 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1649 s->hpc.pred16x16[mode](dst[0], s->linesize);
1651 uint8_t *ptr = dst[0];
1652 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1653 const uint8_t lo = is_vp7 ? 128 : 127;
1654 const uint8_t hi = is_vp7 ? 128 : 129;
1655 uint8_t tr_top[4] = { lo, lo, lo, lo };
1657 // all blocks on the right edge of the macroblock use bottom edge
1658 // the top macroblock for their topright edge
1659 uint8_t *tr_right = ptr - s->linesize + 16;
1661 // if we're on the right edge of the frame, said edge is extended
1662 // from the top macroblock
1663 if (mb_y && mb_x == s->mb_width - 1) {
1664 tr = tr_right[-1] * 0x01010101u;
1665 tr_right = (uint8_t *) &tr;
1669 AV_ZERO128(td->non_zero_count_cache);
1671 for (y = 0; y < 4; y++) {
1672 uint8_t *topright = ptr + 4 - s->linesize;
1673 for (x = 0; x < 4; x++) {
1675 ptrdiff_t linesize = s->linesize;
1676 uint8_t *dst = ptr + 4 * x;
1677 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1679 if ((y == 0 || x == 3) && mb_y == 0) {
1682 topright = tr_right;
1684 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1685 mb_y + y, ©, is_vp7);
1687 dst = copy_dst + 12;
1691 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1693 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1697 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1706 copy_dst[11] = ptr[4 * x - 1];
1707 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1708 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1709 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1712 s->hpc.pred4x4[mode](dst, topright, linesize);
1714 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1715 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1716 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1717 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1720 nnz = td->non_zero_count_cache[y][x];
1723 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1724 td->block[y][x], s->linesize);
1726 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1727 td->block[y][x], s->linesize);
1732 ptr += 4 * s->linesize;
1737 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1738 mb_x, mb_y, is_vp7);
1739 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1740 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1742 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1743 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1744 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1745 s->filter.simple, 0);
1748 static const uint8_t subpel_idx[3][8] = {
1749 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1750 // also function pointer index
1751 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1752 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1758 * @param s VP8 decoding context
1759 * @param dst target buffer for block data at block position
1760 * @param ref reference picture buffer at origin (0, 0)
1761 * @param mv motion vector (relative to block position) to get pixel data from
1762 * @param x_off horizontal position of block from origin (0, 0)
1763 * @param y_off vertical position of block from origin (0, 0)
1764 * @param block_w width of block (16, 8 or 4)
1765 * @param block_h height of block (always same as block_w)
1766 * @param width width of src/dst plane data
1767 * @param height height of src/dst plane data
1768 * @param linesize size of a single line of plane data, including padding
1769 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1771 static av_always_inline
1772 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1773 ThreadFrame *ref, const VP56mv *mv,
1774 int x_off, int y_off, int block_w, int block_h,
1775 int width, int height, ptrdiff_t linesize,
1776 vp8_mc_func mc_func[3][3])
1778 uint8_t *src = ref->f->data[0];
1781 ptrdiff_t src_linesize = linesize;
1783 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1784 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1786 x_off += mv->x >> 2;
1787 y_off += mv->y >> 2;
1790 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1791 src += y_off * linesize + x_off;
1792 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1793 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1794 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1795 src - my_idx * linesize - mx_idx,
1796 EDGE_EMU_LINESIZE, linesize,
1797 block_w + subpel_idx[1][mx],
1798 block_h + subpel_idx[1][my],
1799 x_off - mx_idx, y_off - my_idx,
1801 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1802 src_linesize = EDGE_EMU_LINESIZE;
1804 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1806 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1807 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1808 linesize, block_h, 0, 0);
1813 * chroma MC function
1815 * @param s VP8 decoding context
1816 * @param dst1 target buffer for block data at block position (U plane)
1817 * @param dst2 target buffer for block data at block position (V plane)
1818 * @param ref reference picture buffer at origin (0, 0)
1819 * @param mv motion vector (relative to block position) to get pixel data from
1820 * @param x_off horizontal position of block from origin (0, 0)
1821 * @param y_off vertical position of block from origin (0, 0)
1822 * @param block_w width of block (16, 8 or 4)
1823 * @param block_h height of block (always same as block_w)
1824 * @param width width of src/dst plane data
1825 * @param height height of src/dst plane data
1826 * @param linesize size of a single line of plane data, including padding
1827 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1829 static av_always_inline
1830 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1831 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1832 int x_off, int y_off, int block_w, int block_h,
1833 int width, int height, ptrdiff_t linesize,
1834 vp8_mc_func mc_func[3][3])
1836 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1839 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1840 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1842 x_off += mv->x >> 3;
1843 y_off += mv->y >> 3;
1846 src1 += y_off * linesize + x_off;
1847 src2 += y_off * linesize + x_off;
1848 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1849 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1850 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1851 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1852 src1 - my_idx * linesize - mx_idx,
1853 EDGE_EMU_LINESIZE, linesize,
1854 block_w + subpel_idx[1][mx],
1855 block_h + subpel_idx[1][my],
1856 x_off - mx_idx, y_off - my_idx, width, height);
1857 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1858 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1860 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1861 src2 - my_idx * linesize - mx_idx,
1862 EDGE_EMU_LINESIZE, linesize,
1863 block_w + subpel_idx[1][mx],
1864 block_h + subpel_idx[1][my],
1865 x_off - mx_idx, y_off - my_idx, width, height);
1866 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1867 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1869 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1870 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1873 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1874 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1875 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1879 static av_always_inline
1880 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1881 ThreadFrame *ref_frame, int x_off, int y_off,
1882 int bx_off, int by_off, int block_w, int block_h,
1883 int width, int height, VP56mv *mv)
1888 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1889 ref_frame, mv, x_off + bx_off, y_off + by_off,
1890 block_w, block_h, width, height, s->linesize,
1891 s->put_pixels_tab[block_w == 8]);
1894 if (s->profile == 3) {
1895 /* this block only applies VP8; it is safe to check
1896 * only the profile, as VP7 profile <= 1 */
1908 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1909 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1910 &uvmv, x_off + bx_off, y_off + by_off,
1911 block_w, block_h, width, height, s->uvlinesize,
1912 s->put_pixels_tab[1 + (block_w == 4)]);
1915 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1916 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1917 static av_always_inline
1918 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1921 /* Don't prefetch refs that haven't been used very often this frame. */
1922 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1923 int x_off = mb_x << 4, y_off = mb_y << 4;
1924 int mx = (mb->mv.x >> 2) + x_off + 8;
1925 int my = (mb->mv.y >> 2) + y_off;
1926 uint8_t **src = s->framep[ref]->tf.f->data;
1927 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1928 /* For threading, a ff_thread_await_progress here might be useful, but
1929 * it actually slows down the decoder. Since a bad prefetch doesn't
1930 * generate bad decoder output, we don't run it here. */
1931 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1932 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1933 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1938 * Apply motion vectors to prediction buffer, chapter 18.
1940 static av_always_inline
1941 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1942 VP8Macroblock *mb, int mb_x, int mb_y)
1944 int x_off = mb_x << 4, y_off = mb_y << 4;
1945 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1946 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1947 VP56mv *bmv = mb->bmv;
1949 switch (mb->partitioning) {
1950 case VP8_SPLITMVMODE_NONE:
1951 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1952 0, 0, 16, 16, width, height, &mb->mv);
1954 case VP8_SPLITMVMODE_4x4: {
1959 for (y = 0; y < 4; y++) {
1960 for (x = 0; x < 4; x++) {
1961 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1962 ref, &bmv[4 * y + x],
1963 4 * x + x_off, 4 * y + y_off, 4, 4,
1964 width, height, s->linesize,
1965 s->put_pixels_tab[2]);
1974 for (y = 0; y < 2; y++) {
1975 for (x = 0; x < 2; x++) {
1976 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1977 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1978 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1979 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1980 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1981 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1982 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1983 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1984 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1985 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1986 if (s->profile == 3) {
1990 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1991 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1992 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1993 width, height, s->uvlinesize,
1994 s->put_pixels_tab[2]);
1999 case VP8_SPLITMVMODE_16x8:
2000 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2001 0, 0, 16, 8, width, height, &bmv[0]);
2002 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2003 0, 8, 16, 8, width, height, &bmv[1]);
2005 case VP8_SPLITMVMODE_8x16:
2006 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2007 0, 0, 8, 16, width, height, &bmv[0]);
2008 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2009 8, 0, 8, 16, width, height, &bmv[1]);
2011 case VP8_SPLITMVMODE_8x8:
2012 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2013 0, 0, 8, 8, width, height, &bmv[0]);
2014 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2015 8, 0, 8, 8, width, height, &bmv[1]);
2016 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2017 0, 8, 8, 8, width, height, &bmv[2]);
2018 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2019 8, 8, 8, 8, width, height, &bmv[3]);
2024 static av_always_inline
2025 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2029 if (mb->mode != MODE_I4x4) {
2030 uint8_t *y_dst = dst[0];
2031 for (y = 0; y < 4; y++) {
2032 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2034 if (nnz4 & ~0x01010101) {
2035 for (x = 0; x < 4; x++) {
2036 if ((uint8_t) nnz4 == 1)
2037 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2040 else if ((uint8_t) nnz4 > 1)
2041 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2049 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2052 y_dst += 4 * s->linesize;
2056 for (ch = 0; ch < 2; ch++) {
2057 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2059 uint8_t *ch_dst = dst[1 + ch];
2060 if (nnz4 & ~0x01010101) {
2061 for (y = 0; y < 2; y++) {
2062 for (x = 0; x < 2; x++) {
2063 if ((uint8_t) nnz4 == 1)
2064 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2065 td->block[4 + ch][(y << 1) + x],
2067 else if ((uint8_t) nnz4 > 1)
2068 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2069 td->block[4 + ch][(y << 1) + x],
2073 goto chroma_idct_end;
2075 ch_dst += 4 * s->uvlinesize;
2078 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2086 static av_always_inline
2087 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2088 VP8FilterStrength *f, int is_vp7)
2090 int interior_limit, filter_level;
2092 if (s->segmentation.enabled) {
2093 filter_level = s->segmentation.filter_level[mb->segment];
2094 if (!s->segmentation.absolute_vals)
2095 filter_level += s->filter.level;
2097 filter_level = s->filter.level;
2099 if (s->lf_delta.enabled) {
2100 filter_level += s->lf_delta.ref[mb->ref_frame];
2101 filter_level += s->lf_delta.mode[mb->mode];
2104 filter_level = av_clip_uintp2(filter_level, 6);
2106 interior_limit = filter_level;
2107 if (s->filter.sharpness) {
2108 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2109 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2111 interior_limit = FFMAX(interior_limit, 1);
2113 f->filter_level = filter_level;
2114 f->inner_limit = interior_limit;
2115 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2116 mb->mode == VP8_MVMODE_SPLIT;
2119 static av_always_inline
2120 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2121 int mb_x, int mb_y, int is_vp7)
2123 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2124 int filter_level = f->filter_level;
2125 int inner_limit = f->inner_limit;
2126 int inner_filter = f->inner_filter;
2127 ptrdiff_t linesize = s->linesize;
2128 ptrdiff_t uvlinesize = s->uvlinesize;
2129 static const uint8_t hev_thresh_lut[2][64] = {
2130 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2131 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2132 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2134 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2135 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2136 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2144 bedge_lim_y = filter_level;
2145 bedge_lim_uv = filter_level * 2;
2146 mbedge_lim = filter_level + 2;
2149 bedge_lim_uv = filter_level * 2 + inner_limit;
2150 mbedge_lim = bedge_lim_y + 4;
2153 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2156 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2157 mbedge_lim, inner_limit, hev_thresh);
2158 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2159 mbedge_lim, inner_limit, hev_thresh);
2162 #define H_LOOP_FILTER_16Y_INNER(cond) \
2163 if (cond && inner_filter) { \
2164 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2165 bedge_lim_y, inner_limit, \
2167 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2168 bedge_lim_y, inner_limit, \
2170 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2171 bedge_lim_y, inner_limit, \
2173 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2174 uvlinesize, bedge_lim_uv, \
2175 inner_limit, hev_thresh); \
2178 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2181 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2182 mbedge_lim, inner_limit, hev_thresh);
2183 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2184 mbedge_lim, inner_limit, hev_thresh);
2188 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2189 linesize, bedge_lim_y,
2190 inner_limit, hev_thresh);
2191 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2192 linesize, bedge_lim_y,
2193 inner_limit, hev_thresh);
2194 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2195 linesize, bedge_lim_y,
2196 inner_limit, hev_thresh);
2197 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2198 dst[2] + 4 * uvlinesize,
2199 uvlinesize, bedge_lim_uv,
2200 inner_limit, hev_thresh);
2203 H_LOOP_FILTER_16Y_INNER(is_vp7)
2206 static av_always_inline
2207 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2210 int mbedge_lim, bedge_lim;
2211 int filter_level = f->filter_level;
2212 int inner_limit = f->inner_limit;
2213 int inner_filter = f->inner_filter;
2214 ptrdiff_t linesize = s->linesize;
2219 bedge_lim = 2 * filter_level + inner_limit;
2220 mbedge_lim = bedge_lim + 4;
2223 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2225 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2226 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2227 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2231 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2233 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2234 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2235 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2239 #define MARGIN (16 << 2)
2240 static av_always_inline
2241 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2242 VP8Frame *prev_frame, int is_vp7)
2244 VP8Context *s = avctx->priv_data;
2247 s->mv_bounds.mv_min.y = -MARGIN;
2248 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2249 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2250 VP8Macroblock *mb = s->macroblocks_base +
2251 ((s->mb_width + 1) * (mb_y + 1) + 1);
2252 int mb_xy = mb_y * s->mb_width;
2254 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2256 s->mv_bounds.mv_min.x = -MARGIN;
2257 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2258 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2260 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2261 DC_PRED * 0x01010101);
2262 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2263 prev_frame && prev_frame->seg_map ?
2264 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2265 s->mv_bounds.mv_min.x -= 64;
2266 s->mv_bounds.mv_max.x -= 64;
2268 s->mv_bounds.mv_min.y -= 64;
2269 s->mv_bounds.mv_max.y -= 64;
2273 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2274 VP8Frame *prev_frame)
2276 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2279 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2280 VP8Frame *prev_frame)
2282 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2286 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2288 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2289 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2290 pthread_mutex_lock(&otd->lock); \
2291 atomic_store(&td->wait_mb_pos, tmp); \
2293 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2295 pthread_cond_wait(&otd->cond, &otd->lock); \
2297 atomic_store(&td->wait_mb_pos, INT_MAX); \
2298 pthread_mutex_unlock(&otd->lock); \
2302 #define update_pos(td, mb_y, mb_x) \
2304 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2305 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2307 int is_null = !next_td || !prev_td; \
2308 int pos_check = (is_null) ? 1 : \
2309 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2310 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2311 atomic_store(&td->thread_mb_pos, pos); \
2312 if (sliced_threading && pos_check) { \
2313 pthread_mutex_lock(&td->lock); \
2314 pthread_cond_broadcast(&td->cond); \
2315 pthread_mutex_unlock(&td->lock); \
2319 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2320 #define update_pos(td, mb_y, mb_x) while(0)
2323 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2324 int jobnr, int threadnr, int is_vp7)
2326 VP8Context *s = avctx->priv_data;
2327 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2328 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2329 int mb_x, mb_xy = mb_y * s->mb_width;
2330 int num_jobs = s->num_jobs;
2331 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2332 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2335 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2336 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2337 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2340 if (c->end <= c->buffer && c->bits >= 0)
2341 return AVERROR_INVALIDDATA;
2346 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2347 if (mb_y == s->mb_height - 1)
2350 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2351 if (s->mb_layout == 1)
2352 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2354 // Make sure the previous frame has read its segmentation map,
2355 // if we re-use the same map.
2356 if (prev_frame && s->segmentation.enabled &&
2357 !s->segmentation.update_map)
2358 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2359 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2360 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2361 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2364 if (!is_vp7 || mb_y == 0)
2365 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2367 td->mv_bounds.mv_min.x = -MARGIN;
2368 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2370 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2371 if (c->end <= c->buffer && c->bits >= 0)
2372 return AVERROR_INVALIDDATA;
2373 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2374 if (prev_td != td) {
2375 if (threadnr != 0) {
2376 check_thread_pos(td, prev_td,
2377 mb_x + (is_vp7 ? 2 : 1),
2378 mb_y - (is_vp7 ? 2 : 1));
2380 check_thread_pos(td, prev_td,
2381 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2382 mb_y - (is_vp7 ? 2 : 1));
2386 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2388 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2389 dst[2] - dst[1], 2);
2392 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2393 prev_frame && prev_frame->seg_map ?
2394 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2396 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2399 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2401 if (mb->mode <= MODE_I4x4)
2402 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2404 inter_predict(s, td, dst, mb, mb_x, mb_y);
2406 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2409 idct_mb(s, td, dst, mb);
2411 AV_ZERO64(td->left_nnz);
2412 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2414 /* Reset DC block predictors if they would exist
2415 * if the mb had coefficients */
2416 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2417 td->left_nnz[8] = 0;
2418 s->top_nnz[mb_x][8] = 0;
2422 if (s->deblock_filter)
2423 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2425 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2426 if (s->filter.simple)
2427 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2428 NULL, NULL, s->linesize, 0, 1);
2430 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2431 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2434 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2439 td->mv_bounds.mv_min.x -= 64;
2440 td->mv_bounds.mv_max.x -= 64;
2442 if (mb_x == s->mb_width + 1) {
2443 update_pos(td, mb_y, s->mb_width + 3);
2445 update_pos(td, mb_y, mb_x);
2451 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2452 int jobnr, int threadnr)
2454 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2457 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2458 int jobnr, int threadnr)
2460 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2463 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2464 int jobnr, int threadnr, int is_vp7)
2466 VP8Context *s = avctx->priv_data;
2467 VP8ThreadData *td = &s->thread_data[threadnr];
2468 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2469 AVFrame *curframe = s->curframe->tf.f;
2471 VP8ThreadData *prev_td, *next_td;
2473 curframe->data[0] + 16 * mb_y * s->linesize,
2474 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2475 curframe->data[2] + 8 * mb_y * s->uvlinesize
2478 if (s->mb_layout == 1)
2479 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2481 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2486 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2487 if (mb_y == s->mb_height - 1)
2490 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2492 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2493 VP8FilterStrength *f = &td->filter_strength[mb_x];
2495 check_thread_pos(td, prev_td,
2496 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2498 if (next_td != &s->thread_data[0])
2499 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2501 if (num_jobs == 1) {
2502 if (s->filter.simple)
2503 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2504 NULL, NULL, s->linesize, 0, 1);
2506 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2507 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2510 if (s->filter.simple)
2511 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2513 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2518 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2522 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2523 int jobnr, int threadnr)
2525 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2528 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2529 int jobnr, int threadnr)
2531 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2534 static av_always_inline
2535 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2536 int threadnr, int is_vp7)
2538 VP8Context *s = avctx->priv_data;
2539 VP8ThreadData *td = &s->thread_data[jobnr];
2540 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2541 VP8Frame *curframe = s->curframe;
2542 int mb_y, num_jobs = s->num_jobs;
2545 td->thread_nr = threadnr;
2546 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2547 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2548 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2549 atomic_store(&td->thread_mb_pos, mb_y << 16);
2550 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2552 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2555 if (s->deblock_filter)
2556 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2557 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2559 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2560 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2562 if (avctx->active_thread_type == FF_THREAD_FRAME)
2563 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2569 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2570 int jobnr, int threadnr)
2572 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2575 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2576 int jobnr, int threadnr)
2578 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2581 static av_always_inline
2582 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2583 AVPacket *avpkt, int is_vp7)
2585 VP8Context *s = avctx->priv_data;
2586 int ret, i, referenced, num_jobs;
2587 enum AVDiscard skip_thresh;
2588 VP8Frame *av_uninit(curframe), *prev_frame;
2591 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2593 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2598 if (s->actually_webp) {
2599 // avctx->pix_fmt already set in caller.
2600 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2601 enum AVPixelFormat pix_fmts[] = {
2602 #if CONFIG_VP8_VAAPI_HWACCEL
2605 #if CONFIG_VP8_NVDEC_HWACCEL
2612 s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
2613 if (s->pix_fmt < 0) {
2614 ret = AVERROR(EINVAL);
2617 avctx->pix_fmt = s->pix_fmt;
2620 prev_frame = s->framep[VP56_FRAME_CURRENT];
2622 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2623 s->update_altref == VP56_FRAME_CURRENT;
2625 skip_thresh = !referenced ? AVDISCARD_NONREF
2626 : !s->keyframe ? AVDISCARD_NONKEY
2629 if (avctx->skip_frame >= skip_thresh) {
2631 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2634 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2636 // release no longer referenced frames
2637 for (i = 0; i < 5; i++)
2638 if (s->frames[i].tf.f->buf[0] &&
2639 &s->frames[i] != prev_frame &&
2640 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2641 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2642 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2643 vp8_release_frame(s, &s->frames[i]);
2645 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2648 avctx->colorspace = AVCOL_SPC_BT470BG;
2650 avctx->color_range = AVCOL_RANGE_JPEG;
2652 avctx->color_range = AVCOL_RANGE_MPEG;
2654 /* Given that arithmetic probabilities are updated every frame, it's quite
2655 * likely that the values we have on a random interframe are complete
2656 * junk if we didn't start decode on a keyframe. So just don't display
2657 * anything rather than junk. */
2658 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2659 !s->framep[VP56_FRAME_GOLDEN] ||
2660 !s->framep[VP56_FRAME_GOLDEN2])) {
2661 av_log(avctx, AV_LOG_WARNING,
2662 "Discarding interframe without a prior keyframe!\n");
2663 ret = AVERROR_INVALIDDATA;
2667 curframe->tf.f->key_frame = s->keyframe;
2668 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2669 : AV_PICTURE_TYPE_P;
2670 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2673 // check if golden and altref are swapped
2674 if (s->update_altref != VP56_FRAME_NONE)
2675 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2677 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2679 if (s->update_golden != VP56_FRAME_NONE)
2680 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2682 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2685 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2687 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2689 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2691 ff_thread_finish_setup(avctx);
2693 if (avctx->hwaccel) {
2694 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2698 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2702 ret = avctx->hwaccel->end_frame(avctx);
2707 s->linesize = curframe->tf.f->linesize[0];
2708 s->uvlinesize = curframe->tf.f->linesize[1];
2710 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2711 /* Zero macroblock structures for top/top-left prediction
2712 * from outside the frame. */
2714 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2715 (s->mb_width + 1) * sizeof(*s->macroblocks));
2716 if (!s->mb_layout && s->keyframe)
2717 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2719 memset(s->ref_count, 0, sizeof(s->ref_count));
2721 if (s->mb_layout == 1) {
2722 // Make sure the previous frame has read its segmentation map,
2723 // if we re-use the same map.
2724 if (prev_frame && s->segmentation.enabled &&
2725 !s->segmentation.update_map)
2726 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2728 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2730 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2733 if (avctx->active_thread_type == FF_THREAD_FRAME)
2736 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2737 s->num_jobs = num_jobs;
2738 s->curframe = curframe;
2739 s->prev_frame = prev_frame;
2740 s->mv_bounds.mv_min.y = -MARGIN;
2741 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2742 for (i = 0; i < MAX_THREADS; i++) {
2743 VP8ThreadData *td = &s->thread_data[i];
2744 atomic_init(&td->thread_mb_pos, 0);
2745 atomic_init(&td->wait_mb_pos, INT_MAX);
2748 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2751 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2755 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2756 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2759 // if future frames don't use the updated probabilities,
2760 // reset them to the values we saved
2761 if (!s->update_probabilities)
2762 s->prob[0] = s->prob[1];
2764 if (!s->invisible) {
2765 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2772 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2776 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2779 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2782 #if CONFIG_VP7_DECODER
2783 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2786 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2788 #endif /* CONFIG_VP7_DECODER */
2790 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2792 VP8Context *s = avctx->priv_data;
2798 vp8_decode_flush_impl(avctx, 1);
2799 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2800 av_frame_free(&s->frames[i].tf.f);
2805 static av_cold int vp8_init_frames(VP8Context *s)
2808 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2809 s->frames[i].tf.f = av_frame_alloc();
2810 if (!s->frames[i].tf.f)
2811 return AVERROR(ENOMEM);
2816 static av_always_inline
2817 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2819 VP8Context *s = avctx->priv_data;
2823 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2824 s->pix_fmt = AV_PIX_FMT_NONE;
2825 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2826 avctx->internal->allocate_progress = 1;
2828 ff_videodsp_init(&s->vdsp, 8);
2830 ff_vp78dsp_init(&s->vp8dsp);
2831 if (CONFIG_VP7_DECODER && is_vp7) {
2832 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2833 ff_vp7dsp_init(&s->vp8dsp);
2834 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2835 s->filter_mb_row = vp7_filter_mb_row;
2836 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2837 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2838 ff_vp8dsp_init(&s->vp8dsp);
2839 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2840 s->filter_mb_row = vp8_filter_mb_row;
2843 /* does not change for VP8 */
2844 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2846 if ((ret = vp8_init_frames(s)) < 0) {
2847 ff_vp8_decode_free(avctx);
2854 #if CONFIG_VP7_DECODER
2855 static int vp7_decode_init(AVCodecContext *avctx)
2857 return vp78_decode_init(avctx, IS_VP7);
2859 #endif /* CONFIG_VP7_DECODER */
2861 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2863 return vp78_decode_init(avctx, IS_VP8);
2866 #if CONFIG_VP8_DECODER
2868 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2870 VP8Context *s = avctx->priv_data;
2875 if ((ret = vp8_init_frames(s)) < 0) {
2876 ff_vp8_decode_free(avctx);
2883 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2885 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2886 const AVCodecContext *src)
2888 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2891 if (s->macroblocks_base &&
2892 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2894 s->mb_width = s_src->mb_width;
2895 s->mb_height = s_src->mb_height;
2898 s->pix_fmt = s_src->pix_fmt;
2899 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2900 s->segmentation = s_src->segmentation;
2901 s->lf_delta = s_src->lf_delta;
2902 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2904 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2905 if (s_src->frames[i].tf.f->buf[0]) {
2906 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2912 s->framep[0] = REBASE(s_src->next_framep[0]);
2913 s->framep[1] = REBASE(s_src->next_framep[1]);
2914 s->framep[2] = REBASE(s_src->next_framep[2]);
2915 s->framep[3] = REBASE(s_src->next_framep[3]);
2919 #endif /* HAVE_THREADS */
2920 #endif /* CONFIG_VP8_DECODER */
2922 #if CONFIG_VP7_DECODER
2923 AVCodec ff_vp7_decoder = {
2925 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2926 .type = AVMEDIA_TYPE_VIDEO,
2927 .id = AV_CODEC_ID_VP7,
2928 .priv_data_size = sizeof(VP8Context),
2929 .init = vp7_decode_init,
2930 .close = ff_vp8_decode_free,
2931 .decode = vp7_decode_frame,
2932 .capabilities = AV_CODEC_CAP_DR1,
2933 .flush = vp8_decode_flush,
2935 #endif /* CONFIG_VP7_DECODER */
2937 #if CONFIG_VP8_DECODER
2938 AVCodec ff_vp8_decoder = {
2940 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2941 .type = AVMEDIA_TYPE_VIDEO,
2942 .id = AV_CODEC_ID_VP8,
2943 .priv_data_size = sizeof(VP8Context),
2944 .init = ff_vp8_decode_init,
2945 .close = ff_vp8_decode_free,
2946 .decode = ff_vp8_decode_frame,
2947 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2948 AV_CODEC_CAP_SLICE_THREADS,
2949 .flush = vp8_decode_flush,
2950 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2951 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2952 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2953 #if CONFIG_VP8_VAAPI_HWACCEL
2956 #if CONFIG_VP8_NVDEC_HWACCEL
2962 #endif /* CONFIG_VP7_DECODER */