2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context *s)
52 for (i = 0; i < MAX_THREADS; i++) {
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
57 av_freep(&s->thread_data[i].filter_strength);
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
65 s->macroblocks = NULL;
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
92 vp8_release_frame(s, dst);
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
151 AVCodecContext *avctx = s->avctx;
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
158 ret = ff_set_dimensions(s->avctx, width, height);
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 avctx->thread_count > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
179 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
182 return AVERROR(ENOMEM);
185 for (i = 0; i < MAX_THREADS; i++) {
186 s->thread_data[i].filter_strength =
187 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188 if (!s->thread_data[i].filter_strength) {
190 return AVERROR(ENOMEM);
193 pthread_mutex_init(&s->thread_data[i].lock, NULL);
194 pthread_cond_init(&s->thread_data[i].cond, NULL);
198 s->macroblocks = s->macroblocks_base + 1;
203 static int vp7_update_dimensions(VP8Context *s, int width, int height)
205 return update_dimensions(s, width, height, IS_VP7);
208 static int vp8_update_dimensions(VP8Context *s, int width, int height)
210 return update_dimensions(s, width, height, IS_VP8);
214 static void parse_segment_info(VP8Context *s)
216 VP56RangeCoder *c = &s->c;
219 s->segmentation.update_map = vp8_rac_get(c);
221 if (vp8_rac_get(c)) { // update segment feature data
222 s->segmentation.absolute_vals = vp8_rac_get(c);
224 for (i = 0; i < 4; i++)
225 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
227 for (i = 0; i < 4; i++)
228 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
230 if (s->segmentation.update_map)
231 for (i = 0; i < 3; i++)
232 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
235 static void update_lf_deltas(VP8Context *s)
237 VP56RangeCoder *c = &s->c;
240 for (i = 0; i < 4; i++) {
241 if (vp8_rac_get(c)) {
242 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
245 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
249 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250 if (vp8_rac_get(c)) {
251 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
254 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
261 const uint8_t *sizes = buf;
264 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
266 buf += 3 * (s->num_coeff_partitions - 1);
267 buf_size -= 3 * (s->num_coeff_partitions - 1);
271 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272 int size = AV_RL24(sizes + 3 * i);
273 if (buf_size - size < 0)
276 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
280 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
285 static void vp7_get_quants(VP8Context *s)
287 VP56RangeCoder *c = &s->c;
289 int yac_qi = vp8_rac_get_uint(c, 7);
290 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
304 static void vp8_get_quants(VP8Context *s)
306 VP56RangeCoder *c = &s->c;
309 int yac_qi = vp8_rac_get_uint(c, 7);
310 int ydc_delta = vp8_rac_get_sint(c, 4);
311 int y2dc_delta = vp8_rac_get_sint(c, 4);
312 int y2ac_delta = vp8_rac_get_sint(c, 4);
313 int uvdc_delta = vp8_rac_get_sint(c, 4);
314 int uvac_delta = vp8_rac_get_sint(c, 4);
316 for (i = 0; i < 4; i++) {
317 if (s->segmentation.enabled) {
318 base_qi = s->segmentation.base_quant[i];
319 if (!s->segmentation.absolute_vals)
324 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327 /* 101581>>16 is equivalent to 155/100 */
328 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
332 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
338 * Determine which buffers golden and altref should be updated with after this frame.
339 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
341 * Intra frames update all 3 references
342 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343 * If the update (golden|altref) flag is set, it's updated with the current frame
344 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345 * If the flag is not set, the number read means:
347 * 1: VP56_FRAME_PREVIOUS
348 * 2: update golden with altref, or update altref with golden
350 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
352 VP56RangeCoder *c = &s->c;
355 return VP56_FRAME_CURRENT;
357 switch (vp8_rac_get_uint(c, 2)) {
359 return VP56_FRAME_PREVIOUS;
361 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
363 return VP56_FRAME_NONE;
366 static void vp78_reset_probability_tables(VP8Context *s)
369 for (i = 0; i < 4; i++)
370 for (j = 0; j < 16; j++)
371 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372 sizeof(s->prob->token[i][j]));
375 static void vp78_update_probability_tables(VP8Context *s)
377 VP56RangeCoder *c = &s->c;
380 for (i = 0; i < 4; i++)
381 for (j = 0; j < 8; j++)
382 for (k = 0; k < 3; k++)
383 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
384 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
385 int prob = vp8_rac_get_uint(c, 8);
386 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
394 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
397 VP56RangeCoder *c = &s->c;
401 for (i = 0; i < 4; i++)
402 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
404 for (i = 0; i < 3; i++)
405 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
407 // 17.2 MV probability update
408 for (i = 0; i < 2; i++)
409 for (j = 0; j < mvc_size; j++)
410 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
411 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 static void update_refs(VP8Context *s)
416 VP56RangeCoder *c = &s->c;
418 int update_golden = vp8_rac_get(c);
419 int update_altref = vp8_rac_get(c);
421 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
429 for (j = 1; j < 3; j++) {
430 for (i = 0; i < height / 2; i++)
431 memcpy(dst->data[j] + i * dst->linesize[j],
432 src->data[j] + i * src->linesize[j], width / 2);
436 static void fade(uint8_t *dst, int dst_linesize,
437 const uint8_t *src, int src_linesize,
438 int width, int height,
442 for (j = 0; j < height; j++) {
443 for (i = 0; i < width; i++) {
444 uint8_t y = src[j * src_linesize + i];
445 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
450 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
452 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453 int beta = (int8_t) vp8_rac_get_uint(c, 8);
456 if (!s->keyframe && (alpha || beta)) {
457 int width = s->mb_width * 16;
458 int height = s->mb_height * 16;
461 if (!s->framep[VP56_FRAME_PREVIOUS] ||
462 !s->framep[VP56_FRAME_GOLDEN]) {
463 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464 return AVERROR_INVALIDDATA;
468 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
470 /* preserve the golden frame, write a new previous frame */
471 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
472 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
473 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
476 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
478 copy_chroma(dst, src, width, height);
481 fade(dst->data[0], dst->linesize[0],
482 src->data[0], src->linesize[0],
483 width, height, alpha, beta);
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
491 VP56RangeCoder *c = &s->c;
492 int part1_size, hscale, vscale, i, j, ret;
493 int width = s->avctx->width;
494 int height = s->avctx->height;
497 return AVERROR_INVALIDDATA;
500 s->profile = (buf[0] >> 1) & 7;
501 if (s->profile > 1) {
502 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
503 return AVERROR_INVALIDDATA;
506 s->keyframe = !(buf[0] & 1);
508 part1_size = AV_RL24(buf) >> 4;
510 if (buf_size < 4 - s->profile + part1_size) {
511 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
512 return AVERROR_INVALIDDATA;
515 buf += 4 - s->profile;
516 buf_size -= 4 - s->profile;
518 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
520 ff_vp56_init_range_decoder(c, buf, part1_size);
522 buf_size -= part1_size;
524 /* A. Dimension information (keyframes only) */
526 width = vp8_rac_get_uint(c, 12);
527 height = vp8_rac_get_uint(c, 12);
528 hscale = vp8_rac_get_uint(c, 2);
529 vscale = vp8_rac_get_uint(c, 2);
530 if (hscale || vscale)
531 avpriv_request_sample(s->avctx, "Upscaling");
533 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
534 vp78_reset_probability_tables(s);
535 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
536 sizeof(s->prob->pred16x16));
537 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
538 sizeof(s->prob->pred8x8c));
539 for (i = 0; i < 2; i++)
540 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
541 sizeof(vp7_mv_default_prob[i]));
542 memset(&s->segmentation, 0, sizeof(s->segmentation));
543 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
544 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
547 if (s->keyframe || s->profile > 0)
548 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
550 /* B. Decoding information for all four macroblock-level features */
551 for (i = 0; i < 4; i++) {
552 s->feature_enabled[i] = vp8_rac_get(c);
553 if (s->feature_enabled[i]) {
554 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
556 for (j = 0; j < 3; j++)
557 s->feature_index_prob[i][j] =
558 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
560 if (vp7_feature_value_size[s->profile][i])
561 for (j = 0; j < 4; j++)
562 s->feature_value[i][j] =
563 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
567 s->segmentation.enabled = 0;
568 s->segmentation.update_map = 0;
569 s->lf_delta.enabled = 0;
571 s->num_coeff_partitions = 1;
572 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
574 if (!s->macroblocks_base || /* first frame */
575 width != s->avctx->width || height != s->avctx->height ||
576 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
577 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
581 /* C. Dequantization indices */
584 /* D. Golden frame update flag (a Flag) for interframes only */
586 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
587 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
591 s->update_probabilities = 1;
594 if (s->profile > 0) {
595 s->update_probabilities = vp8_rac_get(c);
596 if (!s->update_probabilities)
597 s->prob[1] = s->prob[0];
600 s->fade_present = vp8_rac_get(c);
603 /* E. Fading information for previous frame */
604 if (s->fade_present && vp8_rac_get(c)) {
605 if ((ret = vp7_fade_frame(s ,c)) < 0)
609 /* F. Loop filter type */
611 s->filter.simple = vp8_rac_get(c);
613 /* G. DCT coefficient ordering specification */
615 for (i = 1; i < 16; i++)
616 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
618 /* H. Loop filter levels */
620 s->filter.simple = vp8_rac_get(c);
621 s->filter.level = vp8_rac_get_uint(c, 6);
622 s->filter.sharpness = vp8_rac_get_uint(c, 3);
624 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
625 vp78_update_probability_tables(s);
627 s->mbskip_enabled = 0;
629 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
631 s->prob->intra = vp8_rac_get_uint(c, 8);
632 s->prob->last = vp8_rac_get_uint(c, 8);
633 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
639 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
641 VP56RangeCoder *c = &s->c;
642 int header_size, hscale, vscale, ret;
643 int width = s->avctx->width;
644 int height = s->avctx->height;
647 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
648 return AVERROR_INVALIDDATA;
651 s->keyframe = !(buf[0] & 1);
652 s->profile = (buf[0]>>1) & 7;
653 s->invisible = !(buf[0] & 0x10);
654 header_size = AV_RL24(buf) >> 5;
659 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
662 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
663 sizeof(s->put_pixels_tab));
664 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
665 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
666 sizeof(s->put_pixels_tab));
668 if (header_size > buf_size - 7 * s->keyframe) {
669 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
670 return AVERROR_INVALIDDATA;
674 if (AV_RL24(buf) != 0x2a019d) {
675 av_log(s->avctx, AV_LOG_ERROR,
676 "Invalid start code 0x%x\n", AV_RL24(buf));
677 return AVERROR_INVALIDDATA;
679 width = AV_RL16(buf + 3) & 0x3fff;
680 height = AV_RL16(buf + 5) & 0x3fff;
681 hscale = buf[4] >> 6;
682 vscale = buf[6] >> 6;
686 if (hscale || vscale)
687 avpriv_request_sample(s->avctx, "Upscaling");
689 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
690 vp78_reset_probability_tables(s);
691 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
692 sizeof(s->prob->pred16x16));
693 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
694 sizeof(s->prob->pred8x8c));
695 memcpy(s->prob->mvc, vp8_mv_default_prob,
696 sizeof(s->prob->mvc));
697 memset(&s->segmentation, 0, sizeof(s->segmentation));
698 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
701 ff_vp56_init_range_decoder(c, buf, header_size);
703 buf_size -= header_size;
706 s->colorspace = vp8_rac_get(c);
708 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
709 s->fullrange = vp8_rac_get(c);
712 if ((s->segmentation.enabled = vp8_rac_get(c)))
713 parse_segment_info(s);
715 s->segmentation.update_map = 0; // FIXME: move this to some init function?
717 s->filter.simple = vp8_rac_get(c);
718 s->filter.level = vp8_rac_get_uint(c, 6);
719 s->filter.sharpness = vp8_rac_get_uint(c, 3);
721 if ((s->lf_delta.enabled = vp8_rac_get(c)))
725 if (setup_partitions(s, buf, buf_size)) {
726 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
727 return AVERROR_INVALIDDATA;
730 if (!s->macroblocks_base || /* first frame */
731 width != s->avctx->width || height != s->avctx->height ||
732 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
733 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
740 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
741 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
744 // if we aren't saving this frame's probabilities for future frames,
745 // make a copy of the current probabilities
746 if (!(s->update_probabilities = vp8_rac_get(c)))
747 s->prob[1] = s->prob[0];
749 s->update_last = s->keyframe || vp8_rac_get(c);
751 vp78_update_probability_tables(s);
753 if ((s->mbskip_enabled = vp8_rac_get(c)))
754 s->prob->mbskip = vp8_rac_get_uint(c, 8);
757 s->prob->intra = vp8_rac_get_uint(c, 8);
758 s->prob->last = vp8_rac_get_uint(c, 8);
759 s->prob->golden = vp8_rac_get_uint(c, 8);
760 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
766 static av_always_inline
767 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
769 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
770 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
771 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
772 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
776 * Motion vector coding, 17.1.
778 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
782 if (vp56_rac_get_prob_branchy(c, p[0])) {
785 for (i = 0; i < 3; i++)
786 x += vp56_rac_get_prob(c, p[9 + i]) << i;
787 for (i = (vp7 ? 7 : 9); i > 3; i--)
788 x += vp56_rac_get_prob(c, p[9 + i]) << i;
789 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
793 const uint8_t *ps = p + 2;
794 bit = vp56_rac_get_prob(c, *ps);
797 bit = vp56_rac_get_prob(c, *ps);
800 x += vp56_rac_get_prob(c, *ps);
803 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
806 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
808 return read_mv_component(c, p, 1);
811 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
813 return read_mv_component(c, p, 0);
816 static av_always_inline
817 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
820 return vp7_submv_prob;
823 return vp8_submv_prob[4 - !!left];
825 return vp8_submv_prob[2];
826 return vp8_submv_prob[1 - !!left];
830 * Split motion vector prediction, 16.4.
831 * @returns the number of motion vectors parsed (2, 4 or 16)
833 static av_always_inline
834 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
835 int layout, int is_vp7)
839 VP8Macroblock *top_mb;
840 VP8Macroblock *left_mb = &mb[-1];
841 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
842 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
844 VP56mv *left_mv = left_mb->bmv;
845 VP56mv *cur_mv = mb->bmv;
847 if (!layout) // layout is inlined, s->mb_layout is not
850 top_mb = &mb[-s->mb_width - 1];
851 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
852 top_mv = top_mb->bmv;
854 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
855 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
856 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
858 part_idx = VP8_SPLITMVMODE_8x8;
860 part_idx = VP8_SPLITMVMODE_4x4;
863 num = vp8_mbsplit_count[part_idx];
864 mbsplits_cur = vp8_mbsplits[part_idx],
865 firstidx = vp8_mbfirstidx[part_idx];
866 mb->partitioning = part_idx;
868 for (n = 0; n < num; n++) {
870 uint32_t left, above;
871 const uint8_t *submv_prob;
874 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
876 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
878 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
880 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
882 submv_prob = get_submv_prob(left, above, is_vp7);
884 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
885 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
886 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
887 mb->bmv[n].y = mb->mv.y +
888 read_mv_component(c, s->prob->mvc[0], is_vp7);
889 mb->bmv[n].x = mb->mv.x +
890 read_mv_component(c, s->prob->mvc[1], is_vp7);
892 AV_ZERO32(&mb->bmv[n]);
895 AV_WN32A(&mb->bmv[n], above);
898 AV_WN32A(&mb->bmv[n], left);
906 * The vp7 reference decoder uses a padding macroblock column (added to right
907 * edge of the frame) to guard against illegal macroblock offsets. The
908 * algorithm has bugs that permit offsets to straddle the padding column.
909 * This function replicates those bugs.
911 * @param[out] edge_x macroblock x address
912 * @param[out] edge_y macroblock y address
914 * @return macroblock offset legal (boolean)
916 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
917 int xoffset, int yoffset, int boundary,
918 int *edge_x, int *edge_y)
920 int vwidth = mb_width + 1;
921 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
922 if (new < boundary || new % vwidth == vwidth - 1)
924 *edge_y = new / vwidth;
925 *edge_x = new % vwidth;
929 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
931 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
934 static av_always_inline
935 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
936 int mb_x, int mb_y, int layout)
938 VP8Macroblock *mb_edge[12];
939 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
940 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
943 uint8_t cnt[3] = { 0 };
944 VP56RangeCoder *c = &s->c;
947 AV_ZERO32(&near_mv[0]);
948 AV_ZERO32(&near_mv[1]);
949 AV_ZERO32(&near_mv[2]);
951 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
952 const VP7MVPred * pred = &vp7_mv_pred[i];
955 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
956 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
957 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
958 ? s->macroblocks_base + 1 + edge_x +
959 (s->mb_width + 1) * (edge_y + 1)
960 : s->macroblocks + edge_x +
961 (s->mb_height - edge_y - 1) * 2;
962 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
964 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
965 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
967 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
968 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
972 AV_WN32A(&near_mv[CNT_NEAR], mv);
976 AV_WN32A(&near_mv[CNT_NEAREST], mv);
985 cnt[idx] += vp7_mv_pred[i].score;
988 mb->partitioning = VP8_SPLITMVMODE_NONE;
990 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
991 mb->mode = VP8_MVMODE_MV;
993 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
995 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
997 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
998 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1000 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1002 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1003 mb->mode = VP8_MVMODE_SPLIT;
1004 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1006 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1007 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1008 mb->bmv[0] = mb->mv;
1011 mb->mv = near_mv[CNT_NEAR];
1012 mb->bmv[0] = mb->mv;
1015 mb->mv = near_mv[CNT_NEAREST];
1016 mb->bmv[0] = mb->mv;
1019 mb->mode = VP8_MVMODE_ZERO;
1021 mb->bmv[0] = mb->mv;
1025 static av_always_inline
1026 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1027 int mb_x, int mb_y, int layout)
1029 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1032 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1033 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1035 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1036 int8_t *sign_bias = s->sign_bias;
1038 uint8_t cnt[4] = { 0 };
1039 VP56RangeCoder *c = &s->c;
1041 if (!layout) { // layout is inlined (s->mb_layout is not)
1042 mb_edge[0] = mb + 2;
1043 mb_edge[2] = mb + 1;
1045 mb_edge[0] = mb - s->mb_width - 1;
1046 mb_edge[2] = mb - s->mb_width - 2;
1049 AV_ZERO32(&near_mv[0]);
1050 AV_ZERO32(&near_mv[1]);
1051 AV_ZERO32(&near_mv[2]);
1053 /* Process MB on top, left and top-left */
1054 #define MV_EDGE_CHECK(n) \
1056 VP8Macroblock *edge = mb_edge[n]; \
1057 int edge_ref = edge->ref_frame; \
1058 if (edge_ref != VP56_FRAME_CURRENT) { \
1059 uint32_t mv = AV_RN32A(&edge->mv); \
1061 if (cur_sign_bias != sign_bias[edge_ref]) { \
1062 /* SWAR negate of the values in mv. */ \
1064 mv = ((mv & 0x7fff7fff) + \
1065 0x00010001) ^ (mv & 0x80008000); \
1067 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1068 AV_WN32A(&near_mv[++idx], mv); \
1069 cnt[idx] += 1 + (n != 2); \
1071 cnt[CNT_ZERO] += 1 + (n != 2); \
1079 mb->partitioning = VP8_SPLITMVMODE_NONE;
1080 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1081 mb->mode = VP8_MVMODE_MV;
1083 /* If we have three distinct MVs, merge first and last if they're the same */
1084 if (cnt[CNT_SPLITMV] &&
1085 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1086 cnt[CNT_NEAREST] += 1;
1088 /* Swap near and nearest if necessary */
1089 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1090 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1091 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1094 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1095 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1096 /* Choose the best mv out of 0,0 and the nearest mv */
1097 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1098 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1099 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1100 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1102 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1103 mb->mode = VP8_MVMODE_SPLIT;
1104 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1106 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1107 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1108 mb->bmv[0] = mb->mv;
1111 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1112 mb->bmv[0] = mb->mv;
1115 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1116 mb->bmv[0] = mb->mv;
1119 mb->mode = VP8_MVMODE_ZERO;
1121 mb->bmv[0] = mb->mv;
1125 static av_always_inline
1126 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1127 int mb_x, int keyframe, int layout)
1129 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1132 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1133 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1138 uint8_t *const left = s->intra4x4_pred_mode_left;
1140 top = mb->intra4x4_pred_mode_top;
1142 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1143 for (y = 0; y < 4; y++) {
1144 for (x = 0; x < 4; x++) {
1146 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1147 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1148 left[y] = top[x] = *intra4x4;
1154 for (i = 0; i < 16; i++)
1155 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1156 vp8_pred4x4_prob_inter);
1160 static av_always_inline
1161 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1162 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1164 VP56RangeCoder *c = &s->c;
1165 const char *vp7_feature_name[] = { "q-index",
1167 "partial-golden-update",
1172 for (i = 0; i < 4; i++) {
1173 if (s->feature_enabled[i]) {
1174 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1175 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1176 s->feature_index_prob[i]);
1177 av_log(s->avctx, AV_LOG_WARNING,
1178 "Feature %s present in macroblock (value 0x%x)\n",
1179 vp7_feature_name[i], s->feature_value[i][index]);
1183 } else if (s->segmentation.update_map) {
1184 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1185 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1186 } else if (s->segmentation.enabled)
1187 *segment = ref ? *ref : *segment;
1188 mb->segment = *segment;
1190 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1193 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1194 vp8_pred16x16_prob_intra);
1196 if (mb->mode == MODE_I4x4) {
1197 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1199 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1200 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1202 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1204 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1205 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1208 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1209 vp8_pred8x8c_prob_intra);
1210 mb->ref_frame = VP56_FRAME_CURRENT;
1211 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1213 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1215 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1216 : VP56_FRAME_GOLDEN;
1218 mb->ref_frame = VP56_FRAME_PREVIOUS;
1219 s->ref_count[mb->ref_frame - 1]++;
1221 // motion vectors, 16.3
1223 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1225 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1228 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1230 if (mb->mode == MODE_I4x4)
1231 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1233 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1235 mb->ref_frame = VP56_FRAME_CURRENT;
1236 mb->partitioning = VP8_SPLITMVMODE_NONE;
1237 AV_ZERO32(&mb->bmv[0]);
1242 * @param r arithmetic bitstream reader context
1243 * @param block destination for block coefficients
1244 * @param probs probabilities to use when reading trees from the bitstream
1245 * @param i initial coeff index, 0 unless a separate DC block is coded
1246 * @param qmul array holding the dc/ac dequant factor at position 0/1
1248 * @return 0 if no coeffs were decoded
1249 * otherwise, the index of the last coeff decoded plus one
1251 static av_always_inline
1252 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1253 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1254 int i, uint8_t *token_prob, int16_t qmul[2],
1255 const uint8_t scan[16], int vp7)
1257 VP56RangeCoder c = *r;
1262 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1266 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1268 break; // invalid input; blocks should end with EOB
1269 token_prob = probs[i][0];
1275 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1277 token_prob = probs[i + 1][1];
1279 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1280 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1282 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1286 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1287 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1288 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1289 } else { // DCT_CAT2
1291 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1292 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1294 } else { // DCT_CAT3 and up
1295 int a = vp56_rac_get_prob(&c, token_prob[8]);
1296 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1297 int cat = (a << 1) + b;
1298 coeff = 3 + (8 << cat);
1299 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1302 token_prob = probs[i + 1][2];
1304 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1311 static av_always_inline
1312 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1314 int16_t dc = block[0];
1322 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1323 block[0] = pred[0] = dc;
1328 block[0] = pred[0] = dc;
1334 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1336 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1337 int i, uint8_t *token_prob,
1339 const uint8_t scan[16])
1341 return decode_block_coeffs_internal(r, block, probs, i,
1342 token_prob, qmul, scan, IS_VP7);
1345 #ifndef vp8_decode_block_coeffs_internal
1346 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1348 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1349 int i, uint8_t *token_prob,
1352 return decode_block_coeffs_internal(r, block, probs, i,
1353 token_prob, qmul, zigzag_scan, IS_VP8);
1358 * @param c arithmetic bitstream reader context
1359 * @param block destination for block coefficients
1360 * @param probs probabilities to use when reading trees from the bitstream
1361 * @param i initial coeff index, 0 unless a separate DC block is coded
1362 * @param zero_nhood the initial prediction context for number of surrounding
1363 * all-zero blocks (only left/top, so 0-2)
1364 * @param qmul array holding the dc/ac dequant factor at position 0/1
1365 * @param scan scan pattern (VP7 only)
1367 * @return 0 if no coeffs were decoded
1368 * otherwise, the index of the last coeff decoded plus one
1370 static av_always_inline
1371 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1372 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1373 int i, int zero_nhood, int16_t qmul[2],
1374 const uint8_t scan[16], int vp7)
1376 uint8_t *token_prob = probs[i][zero_nhood];
1377 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1379 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1380 token_prob, qmul, scan)
1381 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1385 static av_always_inline
1386 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1387 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1390 int i, x, y, luma_start = 0, luma_ctx = 3;
1391 int nnz_pred, nnz, nnz_total = 0;
1392 int segment = mb->segment;
1395 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1396 nnz_pred = t_nnz[8] + l_nnz[8];
1398 // decode DC values and do hadamard
1399 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1400 nnz_pred, s->qmat[segment].luma_dc_qmul,
1401 zigzag_scan, is_vp7);
1402 l_nnz[8] = t_nnz[8] = !!nnz;
1404 if (is_vp7 && mb->mode > MODE_I4x4) {
1405 nnz |= inter_predict_dc(td->block_dc,
1406 s->inter_dc_pred[mb->ref_frame - 1]);
1413 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1415 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1422 for (y = 0; y < 4; y++)
1423 for (x = 0; x < 4; x++) {
1424 nnz_pred = l_nnz[y] + t_nnz[x];
1425 nnz = decode_block_coeffs(c, td->block[y][x],
1426 s->prob->token[luma_ctx],
1427 luma_start, nnz_pred,
1428 s->qmat[segment].luma_qmul,
1429 s->prob[0].scan, is_vp7);
1430 /* nnz+block_dc may be one more than the actual last index,
1431 * but we don't care */
1432 td->non_zero_count_cache[y][x] = nnz + block_dc;
1433 t_nnz[x] = l_nnz[y] = !!nnz;
1438 // TODO: what to do about dimensions? 2nd dim for luma is x,
1439 // but for chroma it's (y<<1)|x
1440 for (i = 4; i < 6; i++)
1441 for (y = 0; y < 2; y++)
1442 for (x = 0; x < 2; x++) {
1443 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1444 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1445 s->prob->token[2], 0, nnz_pred,
1446 s->qmat[segment].chroma_qmul,
1447 s->prob[0].scan, is_vp7);
1448 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1449 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1453 // if there were no coded coeffs despite the macroblock not being marked skip,
1454 // we MUST not do the inner loop filter and should not do IDCT
1455 // Since skip isn't used for bitstream prediction, just manually set it.
1460 static av_always_inline
1461 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1462 uint8_t *src_cb, uint8_t *src_cr,
1463 int linesize, int uvlinesize, int simple)
1465 AV_COPY128(top_border, src_y + 15 * linesize);
1467 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1468 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1472 static av_always_inline
1473 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1474 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1475 int mb_y, int mb_width, int simple, int xchg)
1477 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1479 src_cb -= uvlinesize;
1480 src_cr -= uvlinesize;
1482 #define XCHG(a, b, xchg) \
1490 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1491 XCHG(top_border, src_y, xchg);
1492 XCHG(top_border + 8, src_y + 8, 1);
1493 if (mb_x < mb_width - 1)
1494 XCHG(top_border + 32, src_y + 16, 1);
1496 // only copy chroma for normal loop filter
1497 // or to initialize the top row to 127
1498 if (!simple || !mb_y) {
1499 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1500 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1501 XCHG(top_border + 16, src_cb, 1);
1502 XCHG(top_border + 24, src_cr, 1);
1506 static av_always_inline
1507 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1510 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1512 return mb_y ? mode : LEFT_DC_PRED8x8;
1515 static av_always_inline
1516 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1519 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1521 return mb_y ? mode : HOR_PRED8x8;
1524 static av_always_inline
1525 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1529 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1531 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1533 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1534 case PLANE_PRED8x8: /* TM */
1535 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1540 static av_always_inline
1541 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1544 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1546 return mb_y ? mode : HOR_VP8_PRED;
1550 static av_always_inline
1551 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1552 int *copy_buf, int vp7)
1556 if (!mb_x && mb_y) {
1561 case DIAG_DOWN_LEFT_PRED:
1562 case VERT_LEFT_PRED:
1563 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1571 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1573 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1574 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1575 * as 16x16/8x8 DC */
1576 case DIAG_DOWN_RIGHT_PRED:
1577 case VERT_RIGHT_PRED:
1586 static av_always_inline
1587 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1588 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1590 int x, y, mode, nnz;
1593 /* for the first row, we need to run xchg_mb_border to init the top edge
1594 * to 127 otherwise, skip it if we aren't going to deblock */
1595 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1596 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1597 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1598 s->filter.simple, 1);
1600 if (mb->mode < MODE_I4x4) {
1601 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1602 s->hpc.pred16x16[mode](dst[0], s->linesize);
1604 uint8_t *ptr = dst[0];
1605 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1606 const uint8_t lo = is_vp7 ? 128 : 127;
1607 const uint8_t hi = is_vp7 ? 128 : 129;
1608 uint8_t tr_top[4] = { lo, lo, lo, lo };
1610 // all blocks on the right edge of the macroblock use bottom edge
1611 // the top macroblock for their topright edge
1612 uint8_t *tr_right = ptr - s->linesize + 16;
1614 // if we're on the right edge of the frame, said edge is extended
1615 // from the top macroblock
1616 if (mb_y && mb_x == s->mb_width - 1) {
1617 tr = tr_right[-1] * 0x01010101u;
1618 tr_right = (uint8_t *) &tr;
1622 AV_ZERO128(td->non_zero_count_cache);
1624 for (y = 0; y < 4; y++) {
1625 uint8_t *topright = ptr + 4 - s->linesize;
1626 for (x = 0; x < 4; x++) {
1627 int copy = 0, linesize = s->linesize;
1628 uint8_t *dst = ptr + 4 * x;
1629 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1631 if ((y == 0 || x == 3) && mb_y == 0) {
1634 topright = tr_right;
1636 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1637 mb_y + y, ©, is_vp7);
1639 dst = copy_dst + 12;
1643 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1645 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1649 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1658 copy_dst[11] = ptr[4 * x - 1];
1659 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1660 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1661 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1664 s->hpc.pred4x4[mode](dst, topright, linesize);
1666 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1667 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1668 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1669 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1672 nnz = td->non_zero_count_cache[y][x];
1675 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1676 td->block[y][x], s->linesize);
1678 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1679 td->block[y][x], s->linesize);
1684 ptr += 4 * s->linesize;
1689 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1690 mb_x, mb_y, is_vp7);
1691 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1692 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1694 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1695 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1696 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1697 s->filter.simple, 0);
1700 static const uint8_t subpel_idx[3][8] = {
1701 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1702 // also function pointer index
1703 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1704 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1710 * @param s VP8 decoding context
1711 * @param dst target buffer for block data at block position
1712 * @param ref reference picture buffer at origin (0, 0)
1713 * @param mv motion vector (relative to block position) to get pixel data from
1714 * @param x_off horizontal position of block from origin (0, 0)
1715 * @param y_off vertical position of block from origin (0, 0)
1716 * @param block_w width of block (16, 8 or 4)
1717 * @param block_h height of block (always same as block_w)
1718 * @param width width of src/dst plane data
1719 * @param height height of src/dst plane data
1720 * @param linesize size of a single line of plane data, including padding
1721 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1723 static av_always_inline
1724 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1725 ThreadFrame *ref, const VP56mv *mv,
1726 int x_off, int y_off, int block_w, int block_h,
1727 int width, int height, ptrdiff_t linesize,
1728 vp8_mc_func mc_func[3][3])
1730 uint8_t *src = ref->f->data[0];
1733 int src_linesize = linesize;
1735 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1736 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1738 x_off += mv->x >> 2;
1739 y_off += mv->y >> 2;
1742 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1743 src += y_off * linesize + x_off;
1744 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1745 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1746 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1747 src - my_idx * linesize - mx_idx,
1748 EDGE_EMU_LINESIZE, linesize,
1749 block_w + subpel_idx[1][mx],
1750 block_h + subpel_idx[1][my],
1751 x_off - mx_idx, y_off - my_idx,
1753 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1754 src_linesize = EDGE_EMU_LINESIZE;
1756 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1758 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1759 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1760 linesize, block_h, 0, 0);
1765 * chroma MC function
1767 * @param s VP8 decoding context
1768 * @param dst1 target buffer for block data at block position (U plane)
1769 * @param dst2 target buffer for block data at block position (V plane)
1770 * @param ref reference picture buffer at origin (0, 0)
1771 * @param mv motion vector (relative to block position) to get pixel data from
1772 * @param x_off horizontal position of block from origin (0, 0)
1773 * @param y_off vertical position of block from origin (0, 0)
1774 * @param block_w width of block (16, 8 or 4)
1775 * @param block_h height of block (always same as block_w)
1776 * @param width width of src/dst plane data
1777 * @param height height of src/dst plane data
1778 * @param linesize size of a single line of plane data, including padding
1779 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1781 static av_always_inline
1782 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1783 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1784 int x_off, int y_off, int block_w, int block_h,
1785 int width, int height, ptrdiff_t linesize,
1786 vp8_mc_func mc_func[3][3])
1788 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1791 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1792 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1794 x_off += mv->x >> 3;
1795 y_off += mv->y >> 3;
1798 src1 += y_off * linesize + x_off;
1799 src2 += y_off * linesize + x_off;
1800 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1801 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1802 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1803 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1804 src1 - my_idx * linesize - mx_idx,
1805 EDGE_EMU_LINESIZE, linesize,
1806 block_w + subpel_idx[1][mx],
1807 block_h + subpel_idx[1][my],
1808 x_off - mx_idx, y_off - my_idx, width, height);
1809 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1810 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1812 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1813 src2 - my_idx * linesize - mx_idx,
1814 EDGE_EMU_LINESIZE, linesize,
1815 block_w + subpel_idx[1][mx],
1816 block_h + subpel_idx[1][my],
1817 x_off - mx_idx, y_off - my_idx, width, height);
1818 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1819 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1821 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1822 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1825 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1826 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1827 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1831 static av_always_inline
1832 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1833 ThreadFrame *ref_frame, int x_off, int y_off,
1834 int bx_off, int by_off, int block_w, int block_h,
1835 int width, int height, VP56mv *mv)
1840 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1841 ref_frame, mv, x_off + bx_off, y_off + by_off,
1842 block_w, block_h, width, height, s->linesize,
1843 s->put_pixels_tab[block_w == 8]);
1846 if (s->profile == 3) {
1847 /* this block only applies VP8; it is safe to check
1848 * only the profile, as VP7 profile <= 1 */
1860 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1861 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1862 &uvmv, x_off + bx_off, y_off + by_off,
1863 block_w, block_h, width, height, s->uvlinesize,
1864 s->put_pixels_tab[1 + (block_w == 4)]);
1867 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1868 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1869 static av_always_inline
1870 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1873 /* Don't prefetch refs that haven't been used very often this frame. */
1874 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1875 int x_off = mb_x << 4, y_off = mb_y << 4;
1876 int mx = (mb->mv.x >> 2) + x_off + 8;
1877 int my = (mb->mv.y >> 2) + y_off;
1878 uint8_t **src = s->framep[ref]->tf.f->data;
1879 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1880 /* For threading, a ff_thread_await_progress here might be useful, but
1881 * it actually slows down the decoder. Since a bad prefetch doesn't
1882 * generate bad decoder output, we don't run it here. */
1883 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1884 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1885 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1890 * Apply motion vectors to prediction buffer, chapter 18.
1892 static av_always_inline
1893 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1894 VP8Macroblock *mb, int mb_x, int mb_y)
1896 int x_off = mb_x << 4, y_off = mb_y << 4;
1897 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1898 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1899 VP56mv *bmv = mb->bmv;
1901 switch (mb->partitioning) {
1902 case VP8_SPLITMVMODE_NONE:
1903 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1904 0, 0, 16, 16, width, height, &mb->mv);
1906 case VP8_SPLITMVMODE_4x4: {
1911 for (y = 0; y < 4; y++) {
1912 for (x = 0; x < 4; x++) {
1913 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1914 ref, &bmv[4 * y + x],
1915 4 * x + x_off, 4 * y + y_off, 4, 4,
1916 width, height, s->linesize,
1917 s->put_pixels_tab[2]);
1926 for (y = 0; y < 2; y++) {
1927 for (x = 0; x < 2; x++) {
1928 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1929 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1930 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1932 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1933 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1934 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1935 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1936 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1937 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1938 if (s->profile == 3) {
1942 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1943 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1944 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1945 width, height, s->uvlinesize,
1946 s->put_pixels_tab[2]);
1951 case VP8_SPLITMVMODE_16x8:
1952 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1953 0, 0, 16, 8, width, height, &bmv[0]);
1954 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1955 0, 8, 16, 8, width, height, &bmv[1]);
1957 case VP8_SPLITMVMODE_8x16:
1958 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1959 0, 0, 8, 16, width, height, &bmv[0]);
1960 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1961 8, 0, 8, 16, width, height, &bmv[1]);
1963 case VP8_SPLITMVMODE_8x8:
1964 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1965 0, 0, 8, 8, width, height, &bmv[0]);
1966 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1967 8, 0, 8, 8, width, height, &bmv[1]);
1968 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1969 0, 8, 8, 8, width, height, &bmv[2]);
1970 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1971 8, 8, 8, 8, width, height, &bmv[3]);
1976 static av_always_inline
1977 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1981 if (mb->mode != MODE_I4x4) {
1982 uint8_t *y_dst = dst[0];
1983 for (y = 0; y < 4; y++) {
1984 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1986 if (nnz4 & ~0x01010101) {
1987 for (x = 0; x < 4; x++) {
1988 if ((uint8_t) nnz4 == 1)
1989 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1992 else if ((uint8_t) nnz4 > 1)
1993 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2001 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2004 y_dst += 4 * s->linesize;
2008 for (ch = 0; ch < 2; ch++) {
2009 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2011 uint8_t *ch_dst = dst[1 + ch];
2012 if (nnz4 & ~0x01010101) {
2013 for (y = 0; y < 2; y++) {
2014 for (x = 0; x < 2; x++) {
2015 if ((uint8_t) nnz4 == 1)
2016 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2017 td->block[4 + ch][(y << 1) + x],
2019 else if ((uint8_t) nnz4 > 1)
2020 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2021 td->block[4 + ch][(y << 1) + x],
2025 goto chroma_idct_end;
2027 ch_dst += 4 * s->uvlinesize;
2030 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2038 static av_always_inline
2039 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2040 VP8FilterStrength *f, int is_vp7)
2042 int interior_limit, filter_level;
2044 if (s->segmentation.enabled) {
2045 filter_level = s->segmentation.filter_level[mb->segment];
2046 if (!s->segmentation.absolute_vals)
2047 filter_level += s->filter.level;
2049 filter_level = s->filter.level;
2051 if (s->lf_delta.enabled) {
2052 filter_level += s->lf_delta.ref[mb->ref_frame];
2053 filter_level += s->lf_delta.mode[mb->mode];
2056 filter_level = av_clip_uintp2(filter_level, 6);
2058 interior_limit = filter_level;
2059 if (s->filter.sharpness) {
2060 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2061 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2063 interior_limit = FFMAX(interior_limit, 1);
2065 f->filter_level = filter_level;
2066 f->inner_limit = interior_limit;
2067 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2068 mb->mode == VP8_MVMODE_SPLIT;
2071 static av_always_inline
2072 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2073 int mb_x, int mb_y, int is_vp7)
2075 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2076 int filter_level = f->filter_level;
2077 int inner_limit = f->inner_limit;
2078 int inner_filter = f->inner_filter;
2079 int linesize = s->linesize;
2080 int uvlinesize = s->uvlinesize;
2081 static const uint8_t hev_thresh_lut[2][64] = {
2082 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2083 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2084 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2086 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2087 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2088 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2096 bedge_lim_y = filter_level;
2097 bedge_lim_uv = filter_level * 2;
2098 mbedge_lim = filter_level + 2;
2101 bedge_lim_uv = filter_level * 2 + inner_limit;
2102 mbedge_lim = bedge_lim_y + 4;
2105 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2108 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2109 mbedge_lim, inner_limit, hev_thresh);
2110 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2111 mbedge_lim, inner_limit, hev_thresh);
2114 #define H_LOOP_FILTER_16Y_INNER(cond) \
2115 if (cond && inner_filter) { \
2116 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2117 bedge_lim_y, inner_limit, \
2119 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2120 bedge_lim_y, inner_limit, \
2122 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2123 bedge_lim_y, inner_limit, \
2125 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2126 uvlinesize, bedge_lim_uv, \
2127 inner_limit, hev_thresh); \
2130 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2133 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2134 mbedge_lim, inner_limit, hev_thresh);
2135 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2136 mbedge_lim, inner_limit, hev_thresh);
2140 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2141 linesize, bedge_lim_y,
2142 inner_limit, hev_thresh);
2143 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2144 linesize, bedge_lim_y,
2145 inner_limit, hev_thresh);
2146 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2147 linesize, bedge_lim_y,
2148 inner_limit, hev_thresh);
2149 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2150 dst[2] + 4 * uvlinesize,
2151 uvlinesize, bedge_lim_uv,
2152 inner_limit, hev_thresh);
2155 H_LOOP_FILTER_16Y_INNER(is_vp7)
2158 static av_always_inline
2159 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2162 int mbedge_lim, bedge_lim;
2163 int filter_level = f->filter_level;
2164 int inner_limit = f->inner_limit;
2165 int inner_filter = f->inner_filter;
2166 int linesize = s->linesize;
2171 bedge_lim = 2 * filter_level + inner_limit;
2172 mbedge_lim = bedge_lim + 4;
2175 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2177 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2178 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2179 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2183 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2185 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2186 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2187 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2191 #define MARGIN (16 << 2)
2192 static av_always_inline
2193 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2194 VP8Frame *prev_frame, int is_vp7)
2196 VP8Context *s = avctx->priv_data;
2199 s->mv_min.y = -MARGIN;
2200 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2201 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2202 VP8Macroblock *mb = s->macroblocks_base +
2203 ((s->mb_width + 1) * (mb_y + 1) + 1);
2204 int mb_xy = mb_y * s->mb_width;
2206 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2208 s->mv_min.x = -MARGIN;
2209 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2210 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2212 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2213 DC_PRED * 0x01010101);
2214 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2215 prev_frame && prev_frame->seg_map ?
2216 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2225 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2226 VP8Frame *prev_frame)
2228 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2231 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2232 VP8Frame *prev_frame)
2234 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2238 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2240 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2241 if (otd->thread_mb_pos < tmp) { \
2242 pthread_mutex_lock(&otd->lock); \
2243 td->wait_mb_pos = tmp; \
2245 if (otd->thread_mb_pos >= tmp) \
2247 pthread_cond_wait(&otd->cond, &otd->lock); \
2249 td->wait_mb_pos = INT_MAX; \
2250 pthread_mutex_unlock(&otd->lock); \
2254 #define update_pos(td, mb_y, mb_x) \
2256 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2257 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2259 int is_null = !next_td || !prev_td; \
2260 int pos_check = (is_null) ? 1 \
2261 : (next_td != td && \
2262 pos >= next_td->wait_mb_pos) || \
2264 pos >= prev_td->wait_mb_pos); \
2265 td->thread_mb_pos = pos; \
2266 if (sliced_threading && pos_check) { \
2267 pthread_mutex_lock(&td->lock); \
2268 pthread_cond_broadcast(&td->cond); \
2269 pthread_mutex_unlock(&td->lock); \
2273 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2274 #define update_pos(td, mb_y, mb_x) while(0)
2277 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2278 int jobnr, int threadnr, int is_vp7)
2280 VP8Context *s = avctx->priv_data;
2281 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2282 int mb_y = td->thread_mb_pos >> 16;
2283 int mb_x, mb_xy = mb_y * s->mb_width;
2284 int num_jobs = s->num_jobs;
2285 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2286 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2289 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2290 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2291 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2296 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2297 if (mb_y == s->mb_height - 1)
2300 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2301 if (s->mb_layout == 1)
2302 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2304 // Make sure the previous frame has read its segmentation map,
2305 // if we re-use the same map.
2306 if (prev_frame && s->segmentation.enabled &&
2307 !s->segmentation.update_map)
2308 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2309 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2310 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2311 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2314 if (!is_vp7 || mb_y == 0)
2315 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2317 s->mv_min.x = -MARGIN;
2318 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2320 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2321 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2322 if (prev_td != td) {
2323 if (threadnr != 0) {
2324 check_thread_pos(td, prev_td,
2325 mb_x + (is_vp7 ? 2 : 1),
2326 mb_y - (is_vp7 ? 2 : 1));
2328 check_thread_pos(td, prev_td,
2329 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2330 mb_y - (is_vp7 ? 2 : 1));
2334 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2336 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2337 dst[2] - dst[1], 2);
2340 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2341 prev_frame && prev_frame->seg_map ?
2342 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2344 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2347 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2349 if (mb->mode <= MODE_I4x4)
2350 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2352 inter_predict(s, td, dst, mb, mb_x, mb_y);
2354 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2357 idct_mb(s, td, dst, mb);
2359 AV_ZERO64(td->left_nnz);
2360 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2362 /* Reset DC block predictors if they would exist
2363 * if the mb had coefficients */
2364 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2365 td->left_nnz[8] = 0;
2366 s->top_nnz[mb_x][8] = 0;
2370 if (s->deblock_filter)
2371 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2373 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2374 if (s->filter.simple)
2375 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2376 NULL, NULL, s->linesize, 0, 1);
2378 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2379 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2382 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2390 if (mb_x == s->mb_width + 1) {
2391 update_pos(td, mb_y, s->mb_width + 3);
2393 update_pos(td, mb_y, mb_x);
2398 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2399 int jobnr, int threadnr)
2401 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2404 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2405 int jobnr, int threadnr)
2407 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2410 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2411 int jobnr, int threadnr, int is_vp7)
2413 VP8Context *s = avctx->priv_data;
2414 VP8ThreadData *td = &s->thread_data[threadnr];
2415 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2416 AVFrame *curframe = s->curframe->tf.f;
2418 VP8ThreadData *prev_td, *next_td;
2420 curframe->data[0] + 16 * mb_y * s->linesize,
2421 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2422 curframe->data[2] + 8 * mb_y * s->uvlinesize
2425 if (s->mb_layout == 1)
2426 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2428 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2433 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2434 if (mb_y == s->mb_height - 1)
2437 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2439 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2440 VP8FilterStrength *f = &td->filter_strength[mb_x];
2442 check_thread_pos(td, prev_td,
2443 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2445 if (next_td != &s->thread_data[0])
2446 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2448 if (num_jobs == 1) {
2449 if (s->filter.simple)
2450 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2451 NULL, NULL, s->linesize, 0, 1);
2453 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2454 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2457 if (s->filter.simple)
2458 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2460 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2465 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2469 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2470 int jobnr, int threadnr)
2472 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2475 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2476 int jobnr, int threadnr)
2478 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2481 static av_always_inline
2482 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2483 int threadnr, int is_vp7)
2485 VP8Context *s = avctx->priv_data;
2486 VP8ThreadData *td = &s->thread_data[jobnr];
2487 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2488 VP8Frame *curframe = s->curframe;
2489 int mb_y, num_jobs = s->num_jobs;
2491 td->thread_nr = threadnr;
2492 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2493 if (mb_y >= s->mb_height)
2495 td->thread_mb_pos = mb_y << 16;
2496 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2497 if (s->deblock_filter)
2498 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2499 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2504 if (avctx->active_thread_type == FF_THREAD_FRAME)
2505 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2511 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2512 int jobnr, int threadnr)
2514 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2517 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2518 int jobnr, int threadnr)
2520 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2524 static av_always_inline
2525 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2526 AVPacket *avpkt, int is_vp7)
2528 VP8Context *s = avctx->priv_data;
2529 int ret, i, referenced, num_jobs;
2530 enum AVDiscard skip_thresh;
2531 VP8Frame *av_uninit(curframe), *prev_frame;
2534 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2536 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2541 prev_frame = s->framep[VP56_FRAME_CURRENT];
2543 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2544 s->update_altref == VP56_FRAME_CURRENT;
2546 skip_thresh = !referenced ? AVDISCARD_NONREF
2547 : !s->keyframe ? AVDISCARD_NONKEY
2550 if (avctx->skip_frame >= skip_thresh) {
2552 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2555 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2557 // release no longer referenced frames
2558 for (i = 0; i < 5; i++)
2559 if (s->frames[i].tf.f->data[0] &&
2560 &s->frames[i] != prev_frame &&
2561 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2562 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2563 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2564 vp8_release_frame(s, &s->frames[i]);
2566 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2569 avctx->colorspace = AVCOL_SPC_BT470BG;
2571 avctx->color_range = AVCOL_RANGE_JPEG;
2573 avctx->color_range = AVCOL_RANGE_MPEG;
2575 /* Given that arithmetic probabilities are updated every frame, it's quite
2576 * likely that the values we have on a random interframe are complete
2577 * junk if we didn't start decode on a keyframe. So just don't display
2578 * anything rather than junk. */
2579 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2580 !s->framep[VP56_FRAME_GOLDEN] ||
2581 !s->framep[VP56_FRAME_GOLDEN2])) {
2582 av_log(avctx, AV_LOG_WARNING,
2583 "Discarding interframe without a prior keyframe!\n");
2584 ret = AVERROR_INVALIDDATA;
2588 curframe->tf.f->key_frame = s->keyframe;
2589 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2590 : AV_PICTURE_TYPE_P;
2591 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2594 // check if golden and altref are swapped
2595 if (s->update_altref != VP56_FRAME_NONE)
2596 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2598 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2600 if (s->update_golden != VP56_FRAME_NONE)
2601 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2603 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2606 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2608 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2610 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2612 if (avctx->codec->update_thread_context)
2613 ff_thread_finish_setup(avctx);
2615 s->linesize = curframe->tf.f->linesize[0];
2616 s->uvlinesize = curframe->tf.f->linesize[1];
2618 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2619 /* Zero macroblock structures for top/top-left prediction
2620 * from outside the frame. */
2622 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2623 (s->mb_width + 1) * sizeof(*s->macroblocks));
2624 if (!s->mb_layout && s->keyframe)
2625 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2627 memset(s->ref_count, 0, sizeof(s->ref_count));
2629 if (s->mb_layout == 1) {
2630 // Make sure the previous frame has read its segmentation map,
2631 // if we re-use the same map.
2632 if (prev_frame && s->segmentation.enabled &&
2633 !s->segmentation.update_map)
2634 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2636 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2638 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2641 if (avctx->active_thread_type == FF_THREAD_FRAME)
2644 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2645 s->num_jobs = num_jobs;
2646 s->curframe = curframe;
2647 s->prev_frame = prev_frame;
2648 s->mv_min.y = -MARGIN;
2649 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2650 for (i = 0; i < MAX_THREADS; i++) {
2651 s->thread_data[i].thread_mb_pos = 0;
2652 s->thread_data[i].wait_mb_pos = INT_MAX;
2655 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2658 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2661 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2662 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2665 // if future frames don't use the updated probabilities,
2666 // reset them to the values we saved
2667 if (!s->update_probabilities)
2668 s->prob[0] = s->prob[1];
2670 if (!s->invisible) {
2671 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2678 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2682 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2685 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2688 #if CONFIG_VP7_DECODER
2689 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2692 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2694 #endif /* CONFIG_VP7_DECODER */
2696 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2698 VP8Context *s = avctx->priv_data;
2704 vp8_decode_flush_impl(avctx, 1);
2705 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2706 av_frame_free(&s->frames[i].tf.f);
2711 static av_cold int vp8_init_frames(VP8Context *s)
2714 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2715 s->frames[i].tf.f = av_frame_alloc();
2716 if (!s->frames[i].tf.f)
2717 return AVERROR(ENOMEM);
2722 static av_always_inline
2723 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2725 VP8Context *s = avctx->priv_data;
2729 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2730 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2731 avctx->internal->allocate_progress = 1;
2733 ff_videodsp_init(&s->vdsp, 8);
2735 ff_vp78dsp_init(&s->vp8dsp);
2736 if (CONFIG_VP7_DECODER && is_vp7) {
2737 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2738 ff_vp7dsp_init(&s->vp8dsp);
2739 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2740 s->filter_mb_row = vp7_filter_mb_row;
2741 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2742 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2743 ff_vp8dsp_init(&s->vp8dsp);
2744 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2745 s->filter_mb_row = vp8_filter_mb_row;
2748 /* does not change for VP8 */
2749 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2751 if ((ret = vp8_init_frames(s)) < 0) {
2752 ff_vp8_decode_free(avctx);
2759 #if CONFIG_VP7_DECODER
2760 static int vp7_decode_init(AVCodecContext *avctx)
2762 return vp78_decode_init(avctx, IS_VP7);
2764 #endif /* CONFIG_VP7_DECODER */
2766 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2768 return vp78_decode_init(avctx, IS_VP8);
2771 #if CONFIG_VP8_DECODER
2773 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2775 VP8Context *s = avctx->priv_data;
2780 if ((ret = vp8_init_frames(s)) < 0) {
2781 ff_vp8_decode_free(avctx);
2788 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2790 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2791 const AVCodecContext *src)
2793 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2796 if (s->macroblocks_base &&
2797 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2799 s->mb_width = s_src->mb_width;
2800 s->mb_height = s_src->mb_height;
2803 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2804 s->segmentation = s_src->segmentation;
2805 s->lf_delta = s_src->lf_delta;
2806 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2808 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2809 if (s_src->frames[i].tf.f->data[0]) {
2810 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2816 s->framep[0] = REBASE(s_src->next_framep[0]);
2817 s->framep[1] = REBASE(s_src->next_framep[1]);
2818 s->framep[2] = REBASE(s_src->next_framep[2]);
2819 s->framep[3] = REBASE(s_src->next_framep[3]);
2823 #endif /* HAVE_THREADS */
2824 #endif /* CONFIG_VP8_DECODER */
2826 #if CONFIG_VP7_DECODER
2827 AVCodec ff_vp7_decoder = {
2829 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2830 .type = AVMEDIA_TYPE_VIDEO,
2831 .id = AV_CODEC_ID_VP7,
2832 .priv_data_size = sizeof(VP8Context),
2833 .init = vp7_decode_init,
2834 .close = ff_vp8_decode_free,
2835 .decode = vp7_decode_frame,
2836 .capabilities = AV_CODEC_CAP_DR1,
2837 .flush = vp8_decode_flush,
2839 #endif /* CONFIG_VP7_DECODER */
2841 #if CONFIG_VP8_DECODER
2842 AVCodec ff_vp8_decoder = {
2844 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2845 .type = AVMEDIA_TYPE_VIDEO,
2846 .id = AV_CODEC_ID_VP8,
2847 .priv_data_size = sizeof(VP8Context),
2848 .init = ff_vp8_decode_init,
2849 .close = ff_vp8_decode_free,
2850 .decode = ff_vp8_decode_frame,
2851 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2852 AV_CODEC_CAP_SLICE_THREADS,
2853 .flush = vp8_decode_flush,
2854 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2855 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2857 #endif /* CONFIG_VP7_DECODER */