2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context *s)
52 for (i = 0; i < MAX_THREADS; i++) {
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
57 av_freep(&s->thread_data[i].filter_strength);
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
65 s->macroblocks = NULL;
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
92 vp8_release_frame(s, dst);
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
151 AVCodecContext *avctx = s->avctx;
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
158 ret = ff_set_dimensions(s->avctx, width, height);
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
179 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
182 return AVERROR(ENOMEM);
185 for (i = 0; i < MAX_THREADS; i++) {
186 s->thread_data[i].filter_strength =
187 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188 if (!s->thread_data[i].filter_strength) {
190 return AVERROR(ENOMEM);
193 pthread_mutex_init(&s->thread_data[i].lock, NULL);
194 pthread_cond_init(&s->thread_data[i].cond, NULL);
198 s->macroblocks = s->macroblocks_base + 1;
203 static int vp7_update_dimensions(VP8Context *s, int width, int height)
205 return update_dimensions(s, width, height, IS_VP7);
208 static int vp8_update_dimensions(VP8Context *s, int width, int height)
210 return update_dimensions(s, width, height, IS_VP8);
214 static void parse_segment_info(VP8Context *s)
216 VP56RangeCoder *c = &s->c;
219 s->segmentation.update_map = vp8_rac_get(c);
221 if (vp8_rac_get(c)) { // update segment feature data
222 s->segmentation.absolute_vals = vp8_rac_get(c);
224 for (i = 0; i < 4; i++)
225 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
227 for (i = 0; i < 4; i++)
228 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
230 if (s->segmentation.update_map)
231 for (i = 0; i < 3; i++)
232 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
235 static void update_lf_deltas(VP8Context *s)
237 VP56RangeCoder *c = &s->c;
240 for (i = 0; i < 4; i++) {
241 if (vp8_rac_get(c)) {
242 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
245 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
249 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250 if (vp8_rac_get(c)) {
251 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
254 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
261 const uint8_t *sizes = buf;
264 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
266 buf += 3 * (s->num_coeff_partitions - 1);
267 buf_size -= 3 * (s->num_coeff_partitions - 1);
271 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272 int size = AV_RL24(sizes + 3 * i);
273 if (buf_size - size < 0)
276 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
280 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
285 static void vp7_get_quants(VP8Context *s)
287 VP56RangeCoder *c = &s->c;
289 int yac_qi = vp8_rac_get_uint(c, 7);
290 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
304 static void vp8_get_quants(VP8Context *s)
306 VP56RangeCoder *c = &s->c;
309 int yac_qi = vp8_rac_get_uint(c, 7);
310 int ydc_delta = vp8_rac_get_sint(c, 4);
311 int y2dc_delta = vp8_rac_get_sint(c, 4);
312 int y2ac_delta = vp8_rac_get_sint(c, 4);
313 int uvdc_delta = vp8_rac_get_sint(c, 4);
314 int uvac_delta = vp8_rac_get_sint(c, 4);
316 for (i = 0; i < 4; i++) {
317 if (s->segmentation.enabled) {
318 base_qi = s->segmentation.base_quant[i];
319 if (!s->segmentation.absolute_vals)
324 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327 /* 101581>>16 is equivalent to 155/100 */
328 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
332 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
338 * Determine which buffers golden and altref should be updated with after this frame.
339 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
341 * Intra frames update all 3 references
342 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343 * If the update (golden|altref) flag is set, it's updated with the current frame
344 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345 * If the flag is not set, the number read means:
347 * 1: VP56_FRAME_PREVIOUS
348 * 2: update golden with altref, or update altref with golden
350 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
352 VP56RangeCoder *c = &s->c;
355 return VP56_FRAME_CURRENT;
357 switch (vp8_rac_get_uint(c, 2)) {
359 return VP56_FRAME_PREVIOUS;
361 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
363 return VP56_FRAME_NONE;
366 static void vp78_reset_probability_tables(VP8Context *s)
369 for (i = 0; i < 4; i++)
370 for (j = 0; j < 16; j++)
371 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372 sizeof(s->prob->token[i][j]));
375 static void vp78_update_probability_tables(VP8Context *s)
377 VP56RangeCoder *c = &s->c;
380 for (i = 0; i < 4; i++)
381 for (j = 0; j < 8; j++)
382 for (k = 0; k < 3; k++)
383 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
384 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
385 int prob = vp8_rac_get_uint(c, 8);
386 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
394 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
397 VP56RangeCoder *c = &s->c;
401 for (i = 0; i < 4; i++)
402 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
404 for (i = 0; i < 3; i++)
405 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
407 // 17.2 MV probability update
408 for (i = 0; i < 2; i++)
409 for (j = 0; j < mvc_size; j++)
410 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
411 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 static void update_refs(VP8Context *s)
416 VP56RangeCoder *c = &s->c;
418 int update_golden = vp8_rac_get(c);
419 int update_altref = vp8_rac_get(c);
421 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
429 for (j = 1; j < 3; j++) {
430 for (i = 0; i < height / 2; i++)
431 memcpy(dst->data[j] + i * dst->linesize[j],
432 src->data[j] + i * src->linesize[j], width / 2);
436 static void fade(uint8_t *dst, int dst_linesize,
437 const uint8_t *src, int src_linesize,
438 int width, int height,
442 for (j = 0; j < height; j++) {
443 for (i = 0; i < width; i++) {
444 uint8_t y = src[j * src_linesize + i];
445 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
450 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
452 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453 int beta = (int8_t) vp8_rac_get_uint(c, 8);
456 if (!s->keyframe && (alpha || beta)) {
457 int width = s->mb_width * 16;
458 int height = s->mb_height * 16;
461 if (!s->framep[VP56_FRAME_PREVIOUS] ||
462 !s->framep[VP56_FRAME_GOLDEN]) {
463 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464 return AVERROR_INVALIDDATA;
468 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
470 /* preserve the golden frame, write a new previous frame */
471 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
472 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
473 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
476 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
478 copy_chroma(dst, src, width, height);
481 fade(dst->data[0], dst->linesize[0],
482 src->data[0], src->linesize[0],
483 width, height, alpha, beta);
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
491 VP56RangeCoder *c = &s->c;
492 int part1_size, hscale, vscale, i, j, ret;
493 int width = s->avctx->width;
494 int height = s->avctx->height;
496 s->profile = (buf[0] >> 1) & 7;
497 if (s->profile > 1) {
498 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
499 return AVERROR_INVALIDDATA;
502 s->keyframe = !(buf[0] & 1);
504 part1_size = AV_RL24(buf) >> 4;
506 if (buf_size < 4 - s->profile + part1_size) {
507 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
508 return AVERROR_INVALIDDATA;
511 buf += 4 - s->profile;
512 buf_size -= 4 - s->profile;
514 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
516 ff_vp56_init_range_decoder(c, buf, part1_size);
518 buf_size -= part1_size;
520 /* A. Dimension information (keyframes only) */
522 width = vp8_rac_get_uint(c, 12);
523 height = vp8_rac_get_uint(c, 12);
524 hscale = vp8_rac_get_uint(c, 2);
525 vscale = vp8_rac_get_uint(c, 2);
526 if (hscale || vscale)
527 avpriv_request_sample(s->avctx, "Upscaling");
529 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
530 vp78_reset_probability_tables(s);
531 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
532 sizeof(s->prob->pred16x16));
533 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
534 sizeof(s->prob->pred8x8c));
535 for (i = 0; i < 2; i++)
536 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
537 sizeof(vp7_mv_default_prob[i]));
538 memset(&s->segmentation, 0, sizeof(s->segmentation));
539 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
540 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
543 if (s->keyframe || s->profile > 0)
544 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
546 /* B. Decoding information for all four macroblock-level features */
547 for (i = 0; i < 4; i++) {
548 s->feature_enabled[i] = vp8_rac_get(c);
549 if (s->feature_enabled[i]) {
550 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
552 for (j = 0; j < 3; j++)
553 s->feature_index_prob[i][j] =
554 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
556 if (vp7_feature_value_size[s->profile][i])
557 for (j = 0; j < 4; j++)
558 s->feature_value[i][j] =
559 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
563 s->segmentation.enabled = 0;
564 s->segmentation.update_map = 0;
565 s->lf_delta.enabled = 0;
567 s->num_coeff_partitions = 1;
568 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
570 if (!s->macroblocks_base || /* first frame */
571 width != s->avctx->width || height != s->avctx->height ||
572 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
573 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
577 /* C. Dequantization indices */
580 /* D. Golden frame update flag (a Flag) for interframes only */
582 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
583 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
587 s->update_probabilities = 1;
590 if (s->profile > 0) {
591 s->update_probabilities = vp8_rac_get(c);
592 if (!s->update_probabilities)
593 s->prob[1] = s->prob[0];
596 s->fade_present = vp8_rac_get(c);
599 /* E. Fading information for previous frame */
600 if (s->fade_present && vp8_rac_get(c)) {
601 if ((ret = vp7_fade_frame(s ,c)) < 0)
605 /* F. Loop filter type */
607 s->filter.simple = vp8_rac_get(c);
609 /* G. DCT coefficient ordering specification */
611 for (i = 1; i < 16; i++)
612 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
614 /* H. Loop filter levels */
616 s->filter.simple = vp8_rac_get(c);
617 s->filter.level = vp8_rac_get_uint(c, 6);
618 s->filter.sharpness = vp8_rac_get_uint(c, 3);
620 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
621 vp78_update_probability_tables(s);
623 s->mbskip_enabled = 0;
625 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
627 s->prob->intra = vp8_rac_get_uint(c, 8);
628 s->prob->last = vp8_rac_get_uint(c, 8);
629 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
635 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
637 VP56RangeCoder *c = &s->c;
638 int header_size, hscale, vscale, ret;
639 int width = s->avctx->width;
640 int height = s->avctx->height;
642 s->keyframe = !(buf[0] & 1);
643 s->profile = (buf[0]>>1) & 7;
644 s->invisible = !(buf[0] & 0x10);
645 header_size = AV_RL24(buf) >> 5;
650 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
653 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
654 sizeof(s->put_pixels_tab));
655 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
656 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
657 sizeof(s->put_pixels_tab));
659 if (header_size > buf_size - 7 * s->keyframe) {
660 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
661 return AVERROR_INVALIDDATA;
665 if (AV_RL24(buf) != 0x2a019d) {
666 av_log(s->avctx, AV_LOG_ERROR,
667 "Invalid start code 0x%x\n", AV_RL24(buf));
668 return AVERROR_INVALIDDATA;
670 width = AV_RL16(buf + 3) & 0x3fff;
671 height = AV_RL16(buf + 5) & 0x3fff;
672 hscale = buf[4] >> 6;
673 vscale = buf[6] >> 6;
677 if (hscale || vscale)
678 avpriv_request_sample(s->avctx, "Upscaling");
680 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
681 vp78_reset_probability_tables(s);
682 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
683 sizeof(s->prob->pred16x16));
684 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
685 sizeof(s->prob->pred8x8c));
686 memcpy(s->prob->mvc, vp8_mv_default_prob,
687 sizeof(s->prob->mvc));
688 memset(&s->segmentation, 0, sizeof(s->segmentation));
689 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
692 ff_vp56_init_range_decoder(c, buf, header_size);
694 buf_size -= header_size;
697 s->colorspace = vp8_rac_get(c);
699 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
700 s->fullrange = vp8_rac_get(c);
703 if ((s->segmentation.enabled = vp8_rac_get(c)))
704 parse_segment_info(s);
706 s->segmentation.update_map = 0; // FIXME: move this to some init function?
708 s->filter.simple = vp8_rac_get(c);
709 s->filter.level = vp8_rac_get_uint(c, 6);
710 s->filter.sharpness = vp8_rac_get_uint(c, 3);
712 if ((s->lf_delta.enabled = vp8_rac_get(c)))
716 if (setup_partitions(s, buf, buf_size)) {
717 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
718 return AVERROR_INVALIDDATA;
721 if (!s->macroblocks_base || /* first frame */
722 width != s->avctx->width || height != s->avctx->height ||
723 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
724 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
731 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
732 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
735 // if we aren't saving this frame's probabilities for future frames,
736 // make a copy of the current probabilities
737 if (!(s->update_probabilities = vp8_rac_get(c)))
738 s->prob[1] = s->prob[0];
740 s->update_last = s->keyframe || vp8_rac_get(c);
742 vp78_update_probability_tables(s);
744 if ((s->mbskip_enabled = vp8_rac_get(c)))
745 s->prob->mbskip = vp8_rac_get_uint(c, 8);
748 s->prob->intra = vp8_rac_get_uint(c, 8);
749 s->prob->last = vp8_rac_get_uint(c, 8);
750 s->prob->golden = vp8_rac_get_uint(c, 8);
751 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
757 static av_always_inline
758 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
760 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
761 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
762 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
763 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
767 * Motion vector coding, 17.1.
769 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
773 if (vp56_rac_get_prob_branchy(c, p[0])) {
776 for (i = 0; i < 3; i++)
777 x += vp56_rac_get_prob(c, p[9 + i]) << i;
778 for (i = (vp7 ? 7 : 9); i > 3; i--)
779 x += vp56_rac_get_prob(c, p[9 + i]) << i;
780 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
784 const uint8_t *ps = p + 2;
785 bit = vp56_rac_get_prob(c, *ps);
788 bit = vp56_rac_get_prob(c, *ps);
791 x += vp56_rac_get_prob(c, *ps);
794 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
797 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
799 return read_mv_component(c, p, 1);
802 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
804 return read_mv_component(c, p, 0);
807 static av_always_inline
808 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
811 return vp7_submv_prob;
814 return vp8_submv_prob[4 - !!left];
816 return vp8_submv_prob[2];
817 return vp8_submv_prob[1 - !!left];
821 * Split motion vector prediction, 16.4.
822 * @returns the number of motion vectors parsed (2, 4 or 16)
824 static av_always_inline
825 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
826 int layout, int is_vp7)
830 VP8Macroblock *top_mb;
831 VP8Macroblock *left_mb = &mb[-1];
832 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
833 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
835 VP56mv *left_mv = left_mb->bmv;
836 VP56mv *cur_mv = mb->bmv;
838 if (!layout) // layout is inlined, s->mb_layout is not
841 top_mb = &mb[-s->mb_width - 1];
842 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
843 top_mv = top_mb->bmv;
845 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
846 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
847 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
849 part_idx = VP8_SPLITMVMODE_8x8;
851 part_idx = VP8_SPLITMVMODE_4x4;
854 num = vp8_mbsplit_count[part_idx];
855 mbsplits_cur = vp8_mbsplits[part_idx],
856 firstidx = vp8_mbfirstidx[part_idx];
857 mb->partitioning = part_idx;
859 for (n = 0; n < num; n++) {
861 uint32_t left, above;
862 const uint8_t *submv_prob;
865 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
867 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
869 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
871 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
873 submv_prob = get_submv_prob(left, above, is_vp7);
875 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
876 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
877 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
878 mb->bmv[n].y = mb->mv.y +
879 read_mv_component(c, s->prob->mvc[0], is_vp7);
880 mb->bmv[n].x = mb->mv.x +
881 read_mv_component(c, s->prob->mvc[1], is_vp7);
883 AV_ZERO32(&mb->bmv[n]);
886 AV_WN32A(&mb->bmv[n], above);
889 AV_WN32A(&mb->bmv[n], left);
897 * The vp7 reference decoder uses a padding macroblock column (added to right
898 * edge of the frame) to guard against illegal macroblock offsets. The
899 * algorithm has bugs that permit offsets to straddle the padding column.
900 * This function replicates those bugs.
902 * @param[out] edge_x macroblock x address
903 * @param[out] edge_y macroblock y address
905 * @return macroblock offset legal (boolean)
907 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
908 int xoffset, int yoffset, int boundary,
909 int *edge_x, int *edge_y)
911 int vwidth = mb_width + 1;
912 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
913 if (new < boundary || new % vwidth == vwidth - 1)
915 *edge_y = new / vwidth;
916 *edge_x = new % vwidth;
920 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
922 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
925 static av_always_inline
926 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
927 int mb_x, int mb_y, int layout)
929 VP8Macroblock *mb_edge[12];
930 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
931 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
934 uint8_t cnt[3] = { 0 };
935 VP56RangeCoder *c = &s->c;
938 AV_ZERO32(&near_mv[0]);
939 AV_ZERO32(&near_mv[1]);
940 AV_ZERO32(&near_mv[2]);
942 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
943 const VP7MVPred * pred = &vp7_mv_pred[i];
946 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
947 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
948 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
949 ? s->macroblocks_base + 1 + edge_x +
950 (s->mb_width + 1) * (edge_y + 1)
951 : s->macroblocks + edge_x +
952 (s->mb_height - edge_y - 1) * 2;
953 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
955 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
956 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
958 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
959 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
963 AV_WN32A(&near_mv[CNT_NEAR], mv);
967 AV_WN32A(&near_mv[CNT_NEAREST], mv);
976 cnt[idx] += vp7_mv_pred[i].score;
979 mb->partitioning = VP8_SPLITMVMODE_NONE;
981 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
982 mb->mode = VP8_MVMODE_MV;
984 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
986 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
988 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
989 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
991 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
993 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
994 mb->mode = VP8_MVMODE_SPLIT;
995 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
997 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
998 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1002 mb->mv = near_mv[CNT_NEAR];
1003 mb->bmv[0] = mb->mv;
1006 mb->mv = near_mv[CNT_NEAREST];
1007 mb->bmv[0] = mb->mv;
1010 mb->mode = VP8_MVMODE_ZERO;
1012 mb->bmv[0] = mb->mv;
1016 static av_always_inline
1017 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1018 int mb_x, int mb_y, int layout)
1020 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1023 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1024 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1026 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1027 int8_t *sign_bias = s->sign_bias;
1029 uint8_t cnt[4] = { 0 };
1030 VP56RangeCoder *c = &s->c;
1032 if (!layout) { // layout is inlined (s->mb_layout is not)
1033 mb_edge[0] = mb + 2;
1034 mb_edge[2] = mb + 1;
1036 mb_edge[0] = mb - s->mb_width - 1;
1037 mb_edge[2] = mb - s->mb_width - 2;
1040 AV_ZERO32(&near_mv[0]);
1041 AV_ZERO32(&near_mv[1]);
1042 AV_ZERO32(&near_mv[2]);
1044 /* Process MB on top, left and top-left */
1045 #define MV_EDGE_CHECK(n) \
1047 VP8Macroblock *edge = mb_edge[n]; \
1048 int edge_ref = edge->ref_frame; \
1049 if (edge_ref != VP56_FRAME_CURRENT) { \
1050 uint32_t mv = AV_RN32A(&edge->mv); \
1052 if (cur_sign_bias != sign_bias[edge_ref]) { \
1053 /* SWAR negate of the values in mv. */ \
1055 mv = ((mv & 0x7fff7fff) + \
1056 0x00010001) ^ (mv & 0x80008000); \
1058 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1059 AV_WN32A(&near_mv[++idx], mv); \
1060 cnt[idx] += 1 + (n != 2); \
1062 cnt[CNT_ZERO] += 1 + (n != 2); \
1070 mb->partitioning = VP8_SPLITMVMODE_NONE;
1071 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1072 mb->mode = VP8_MVMODE_MV;
1074 /* If we have three distinct MVs, merge first and last if they're the same */
1075 if (cnt[CNT_SPLITMV] &&
1076 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1077 cnt[CNT_NEAREST] += 1;
1079 /* Swap near and nearest if necessary */
1080 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1081 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1082 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1085 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1086 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1087 /* Choose the best mv out of 0,0 and the nearest mv */
1088 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1089 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1090 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1091 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1093 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1094 mb->mode = VP8_MVMODE_SPLIT;
1095 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1097 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1098 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1099 mb->bmv[0] = mb->mv;
1102 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1103 mb->bmv[0] = mb->mv;
1106 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1107 mb->bmv[0] = mb->mv;
1110 mb->mode = VP8_MVMODE_ZERO;
1112 mb->bmv[0] = mb->mv;
1116 static av_always_inline
1117 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1118 int mb_x, int keyframe, int layout)
1120 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1123 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1124 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1129 uint8_t *const left = s->intra4x4_pred_mode_left;
1131 top = mb->intra4x4_pred_mode_top;
1133 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1134 for (y = 0; y < 4; y++) {
1135 for (x = 0; x < 4; x++) {
1137 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1138 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1139 left[y] = top[x] = *intra4x4;
1145 for (i = 0; i < 16; i++)
1146 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1147 vp8_pred4x4_prob_inter);
1151 static av_always_inline
1152 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1153 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1155 VP56RangeCoder *c = &s->c;
1156 const char *vp7_feature_name[] = { "q-index",
1158 "partial-golden-update",
1163 for (i = 0; i < 4; i++) {
1164 if (s->feature_enabled[i]) {
1165 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1166 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1167 s->feature_index_prob[i]);
1168 av_log(s->avctx, AV_LOG_WARNING,
1169 "Feature %s present in macroblock (value 0x%x)\n",
1170 vp7_feature_name[i], s->feature_value[i][index]);
1174 } else if (s->segmentation.update_map) {
1175 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1176 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1177 } else if (s->segmentation.enabled)
1178 *segment = ref ? *ref : *segment;
1179 mb->segment = *segment;
1181 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1184 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1185 vp8_pred16x16_prob_intra);
1187 if (mb->mode == MODE_I4x4) {
1188 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1190 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1191 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1193 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1195 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1196 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1199 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1200 vp8_pred8x8c_prob_intra);
1201 mb->ref_frame = VP56_FRAME_CURRENT;
1202 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1204 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1206 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1207 : VP56_FRAME_GOLDEN;
1209 mb->ref_frame = VP56_FRAME_PREVIOUS;
1210 s->ref_count[mb->ref_frame - 1]++;
1212 // motion vectors, 16.3
1214 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1216 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1219 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1221 if (mb->mode == MODE_I4x4)
1222 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1224 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1226 mb->ref_frame = VP56_FRAME_CURRENT;
1227 mb->partitioning = VP8_SPLITMVMODE_NONE;
1228 AV_ZERO32(&mb->bmv[0]);
1233 * @param r arithmetic bitstream reader context
1234 * @param block destination for block coefficients
1235 * @param probs probabilities to use when reading trees from the bitstream
1236 * @param i initial coeff index, 0 unless a separate DC block is coded
1237 * @param qmul array holding the dc/ac dequant factor at position 0/1
1239 * @return 0 if no coeffs were decoded
1240 * otherwise, the index of the last coeff decoded plus one
1242 static av_always_inline
1243 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1244 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1245 int i, uint8_t *token_prob, int16_t qmul[2],
1246 const uint8_t scan[16], int vp7)
1248 VP56RangeCoder c = *r;
1253 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1257 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1259 break; // invalid input; blocks should end with EOB
1260 token_prob = probs[i][0];
1266 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1268 token_prob = probs[i + 1][1];
1270 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1271 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1273 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1277 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1278 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1279 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1280 } else { // DCT_CAT2
1282 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1283 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1285 } else { // DCT_CAT3 and up
1286 int a = vp56_rac_get_prob(&c, token_prob[8]);
1287 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1288 int cat = (a << 1) + b;
1289 coeff = 3 + (8 << cat);
1290 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1293 token_prob = probs[i + 1][2];
1295 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1302 static av_always_inline
1303 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1305 int16_t dc = block[0];
1313 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1314 block[0] = pred[0] = dc;
1319 block[0] = pred[0] = dc;
1325 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1327 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1328 int i, uint8_t *token_prob,
1330 const uint8_t scan[16])
1332 return decode_block_coeffs_internal(r, block, probs, i,
1333 token_prob, qmul, scan, IS_VP7);
1336 #ifndef vp8_decode_block_coeffs_internal
1337 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, uint8_t *token_prob,
1343 return decode_block_coeffs_internal(r, block, probs, i,
1344 token_prob, qmul, zigzag_scan, IS_VP8);
1349 * @param c arithmetic bitstream reader context
1350 * @param block destination for block coefficients
1351 * @param probs probabilities to use when reading trees from the bitstream
1352 * @param i initial coeff index, 0 unless a separate DC block is coded
1353 * @param zero_nhood the initial prediction context for number of surrounding
1354 * all-zero blocks (only left/top, so 0-2)
1355 * @param qmul array holding the dc/ac dequant factor at position 0/1
1356 * @param scan scan pattern (VP7 only)
1358 * @return 0 if no coeffs were decoded
1359 * otherwise, the index of the last coeff decoded plus one
1361 static av_always_inline
1362 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1363 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1364 int i, int zero_nhood, int16_t qmul[2],
1365 const uint8_t scan[16], int vp7)
1367 uint8_t *token_prob = probs[i][zero_nhood];
1368 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1370 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1371 token_prob, qmul, scan)
1372 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1376 static av_always_inline
1377 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1378 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1381 int i, x, y, luma_start = 0, luma_ctx = 3;
1382 int nnz_pred, nnz, nnz_total = 0;
1383 int segment = mb->segment;
1386 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1387 nnz_pred = t_nnz[8] + l_nnz[8];
1389 // decode DC values and do hadamard
1390 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1391 nnz_pred, s->qmat[segment].luma_dc_qmul,
1392 zigzag_scan, is_vp7);
1393 l_nnz[8] = t_nnz[8] = !!nnz;
1395 if (is_vp7 && mb->mode > MODE_I4x4) {
1396 nnz |= inter_predict_dc(td->block_dc,
1397 s->inter_dc_pred[mb->ref_frame - 1]);
1404 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1406 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1413 for (y = 0; y < 4; y++)
1414 for (x = 0; x < 4; x++) {
1415 nnz_pred = l_nnz[y] + t_nnz[x];
1416 nnz = decode_block_coeffs(c, td->block[y][x],
1417 s->prob->token[luma_ctx],
1418 luma_start, nnz_pred,
1419 s->qmat[segment].luma_qmul,
1420 s->prob[0].scan, is_vp7);
1421 /* nnz+block_dc may be one more than the actual last index,
1422 * but we don't care */
1423 td->non_zero_count_cache[y][x] = nnz + block_dc;
1424 t_nnz[x] = l_nnz[y] = !!nnz;
1429 // TODO: what to do about dimensions? 2nd dim for luma is x,
1430 // but for chroma it's (y<<1)|x
1431 for (i = 4; i < 6; i++)
1432 for (y = 0; y < 2; y++)
1433 for (x = 0; x < 2; x++) {
1434 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1435 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1436 s->prob->token[2], 0, nnz_pred,
1437 s->qmat[segment].chroma_qmul,
1438 s->prob[0].scan, is_vp7);
1439 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1440 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1444 // if there were no coded coeffs despite the macroblock not being marked skip,
1445 // we MUST not do the inner loop filter and should not do IDCT
1446 // Since skip isn't used for bitstream prediction, just manually set it.
1451 static av_always_inline
1452 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1453 uint8_t *src_cb, uint8_t *src_cr,
1454 int linesize, int uvlinesize, int simple)
1456 AV_COPY128(top_border, src_y + 15 * linesize);
1458 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1459 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1463 static av_always_inline
1464 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1465 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1466 int mb_y, int mb_width, int simple, int xchg)
1468 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1470 src_cb -= uvlinesize;
1471 src_cr -= uvlinesize;
1473 #define XCHG(a, b, xchg) \
1481 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1482 XCHG(top_border, src_y, xchg);
1483 XCHG(top_border + 8, src_y + 8, 1);
1484 if (mb_x < mb_width - 1)
1485 XCHG(top_border + 32, src_y + 16, 1);
1487 // only copy chroma for normal loop filter
1488 // or to initialize the top row to 127
1489 if (!simple || !mb_y) {
1490 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1491 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1492 XCHG(top_border + 16, src_cb, 1);
1493 XCHG(top_border + 24, src_cr, 1);
1497 static av_always_inline
1498 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1501 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1503 return mb_y ? mode : LEFT_DC_PRED8x8;
1506 static av_always_inline
1507 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1510 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1512 return mb_y ? mode : HOR_PRED8x8;
1515 static av_always_inline
1516 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1520 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1522 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1524 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1525 case PLANE_PRED8x8: /* TM */
1526 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1531 static av_always_inline
1532 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1535 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1537 return mb_y ? mode : HOR_VP8_PRED;
1541 static av_always_inline
1542 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1543 int *copy_buf, int vp7)
1547 if (!mb_x && mb_y) {
1552 case DIAG_DOWN_LEFT_PRED:
1553 case VERT_LEFT_PRED:
1554 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1562 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1564 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1565 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1566 * as 16x16/8x8 DC */
1567 case DIAG_DOWN_RIGHT_PRED:
1568 case VERT_RIGHT_PRED:
1577 static av_always_inline
1578 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1579 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1581 int x, y, mode, nnz;
1584 /* for the first row, we need to run xchg_mb_border to init the top edge
1585 * to 127 otherwise, skip it if we aren't going to deblock */
1586 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1587 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1588 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1589 s->filter.simple, 1);
1591 if (mb->mode < MODE_I4x4) {
1592 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1593 s->hpc.pred16x16[mode](dst[0], s->linesize);
1595 uint8_t *ptr = dst[0];
1596 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1597 const uint8_t lo = is_vp7 ? 128 : 127;
1598 const uint8_t hi = is_vp7 ? 128 : 129;
1599 uint8_t tr_top[4] = { lo, lo, lo, lo };
1601 // all blocks on the right edge of the macroblock use bottom edge
1602 // the top macroblock for their topright edge
1603 uint8_t *tr_right = ptr - s->linesize + 16;
1605 // if we're on the right edge of the frame, said edge is extended
1606 // from the top macroblock
1607 if (mb_y && mb_x == s->mb_width - 1) {
1608 tr = tr_right[-1] * 0x01010101u;
1609 tr_right = (uint8_t *) &tr;
1613 AV_ZERO128(td->non_zero_count_cache);
1615 for (y = 0; y < 4; y++) {
1616 uint8_t *topright = ptr + 4 - s->linesize;
1617 for (x = 0; x < 4; x++) {
1618 int copy = 0, linesize = s->linesize;
1619 uint8_t *dst = ptr + 4 * x;
1620 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1622 if ((y == 0 || x == 3) && mb_y == 0) {
1625 topright = tr_right;
1627 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1628 mb_y + y, ©, is_vp7);
1630 dst = copy_dst + 12;
1634 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1636 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1640 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1649 copy_dst[11] = ptr[4 * x - 1];
1650 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1651 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1652 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1655 s->hpc.pred4x4[mode](dst, topright, linesize);
1657 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1658 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1659 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1660 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1663 nnz = td->non_zero_count_cache[y][x];
1666 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1667 td->block[y][x], s->linesize);
1669 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1670 td->block[y][x], s->linesize);
1675 ptr += 4 * s->linesize;
1680 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1681 mb_x, mb_y, is_vp7);
1682 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1683 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1685 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1686 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1687 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1688 s->filter.simple, 0);
1691 static const uint8_t subpel_idx[3][8] = {
1692 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1693 // also function pointer index
1694 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1695 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1701 * @param s VP8 decoding context
1702 * @param dst target buffer for block data at block position
1703 * @param ref reference picture buffer at origin (0, 0)
1704 * @param mv motion vector (relative to block position) to get pixel data from
1705 * @param x_off horizontal position of block from origin (0, 0)
1706 * @param y_off vertical position of block from origin (0, 0)
1707 * @param block_w width of block (16, 8 or 4)
1708 * @param block_h height of block (always same as block_w)
1709 * @param width width of src/dst plane data
1710 * @param height height of src/dst plane data
1711 * @param linesize size of a single line of plane data, including padding
1712 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1714 static av_always_inline
1715 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1716 ThreadFrame *ref, const VP56mv *mv,
1717 int x_off, int y_off, int block_w, int block_h,
1718 int width, int height, ptrdiff_t linesize,
1719 vp8_mc_func mc_func[3][3])
1721 uint8_t *src = ref->f->data[0];
1724 int src_linesize = linesize;
1726 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1727 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1729 x_off += mv->x >> 2;
1730 y_off += mv->y >> 2;
1733 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1734 src += y_off * linesize + x_off;
1735 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1736 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1737 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1738 src - my_idx * linesize - mx_idx,
1739 EDGE_EMU_LINESIZE, linesize,
1740 block_w + subpel_idx[1][mx],
1741 block_h + subpel_idx[1][my],
1742 x_off - mx_idx, y_off - my_idx,
1744 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1745 src_linesize = EDGE_EMU_LINESIZE;
1747 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1749 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1750 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1751 linesize, block_h, 0, 0);
1756 * chroma MC function
1758 * @param s VP8 decoding context
1759 * @param dst1 target buffer for block data at block position (U plane)
1760 * @param dst2 target buffer for block data at block position (V plane)
1761 * @param ref reference picture buffer at origin (0, 0)
1762 * @param mv motion vector (relative to block position) to get pixel data from
1763 * @param x_off horizontal position of block from origin (0, 0)
1764 * @param y_off vertical position of block from origin (0, 0)
1765 * @param block_w width of block (16, 8 or 4)
1766 * @param block_h height of block (always same as block_w)
1767 * @param width width of src/dst plane data
1768 * @param height height of src/dst plane data
1769 * @param linesize size of a single line of plane data, including padding
1770 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1772 static av_always_inline
1773 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1774 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1775 int x_off, int y_off, int block_w, int block_h,
1776 int width, int height, ptrdiff_t linesize,
1777 vp8_mc_func mc_func[3][3])
1779 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1782 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1783 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1785 x_off += mv->x >> 3;
1786 y_off += mv->y >> 3;
1789 src1 += y_off * linesize + x_off;
1790 src2 += y_off * linesize + x_off;
1791 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1792 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1793 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1794 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1795 src1 - my_idx * linesize - mx_idx,
1796 EDGE_EMU_LINESIZE, linesize,
1797 block_w + subpel_idx[1][mx],
1798 block_h + subpel_idx[1][my],
1799 x_off - mx_idx, y_off - my_idx, width, height);
1800 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1801 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1803 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1804 src2 - my_idx * linesize - mx_idx,
1805 EDGE_EMU_LINESIZE, linesize,
1806 block_w + subpel_idx[1][mx],
1807 block_h + subpel_idx[1][my],
1808 x_off - mx_idx, y_off - my_idx, width, height);
1809 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1810 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1812 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1813 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1816 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1817 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1818 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1822 static av_always_inline
1823 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1824 ThreadFrame *ref_frame, int x_off, int y_off,
1825 int bx_off, int by_off, int block_w, int block_h,
1826 int width, int height, VP56mv *mv)
1831 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1832 ref_frame, mv, x_off + bx_off, y_off + by_off,
1833 block_w, block_h, width, height, s->linesize,
1834 s->put_pixels_tab[block_w == 8]);
1837 if (s->profile == 3) {
1838 /* this block only applies VP8; it is safe to check
1839 * only the profile, as VP7 profile <= 1 */
1851 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1852 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1853 &uvmv, x_off + bx_off, y_off + by_off,
1854 block_w, block_h, width, height, s->uvlinesize,
1855 s->put_pixels_tab[1 + (block_w == 4)]);
1858 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1859 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1860 static av_always_inline
1861 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1864 /* Don't prefetch refs that haven't been used very often this frame. */
1865 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1866 int x_off = mb_x << 4, y_off = mb_y << 4;
1867 int mx = (mb->mv.x >> 2) + x_off + 8;
1868 int my = (mb->mv.y >> 2) + y_off;
1869 uint8_t **src = s->framep[ref]->tf.f->data;
1870 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1871 /* For threading, a ff_thread_await_progress here might be useful, but
1872 * it actually slows down the decoder. Since a bad prefetch doesn't
1873 * generate bad decoder output, we don't run it here. */
1874 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1875 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1876 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1881 * Apply motion vectors to prediction buffer, chapter 18.
1883 static av_always_inline
1884 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1885 VP8Macroblock *mb, int mb_x, int mb_y)
1887 int x_off = mb_x << 4, y_off = mb_y << 4;
1888 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1889 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1890 VP56mv *bmv = mb->bmv;
1892 switch (mb->partitioning) {
1893 case VP8_SPLITMVMODE_NONE:
1894 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1895 0, 0, 16, 16, width, height, &mb->mv);
1897 case VP8_SPLITMVMODE_4x4: {
1902 for (y = 0; y < 4; y++) {
1903 for (x = 0; x < 4; x++) {
1904 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1905 ref, &bmv[4 * y + x],
1906 4 * x + x_off, 4 * y + y_off, 4, 4,
1907 width, height, s->linesize,
1908 s->put_pixels_tab[2]);
1917 for (y = 0; y < 2; y++) {
1918 for (x = 0; x < 2; x++) {
1919 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1920 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1921 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1922 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1923 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1924 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1925 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1926 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1927 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1928 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1929 if (s->profile == 3) {
1933 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1934 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1935 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1936 width, height, s->uvlinesize,
1937 s->put_pixels_tab[2]);
1942 case VP8_SPLITMVMODE_16x8:
1943 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1944 0, 0, 16, 8, width, height, &bmv[0]);
1945 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1946 0, 8, 16, 8, width, height, &bmv[1]);
1948 case VP8_SPLITMVMODE_8x16:
1949 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1950 0, 0, 8, 16, width, height, &bmv[0]);
1951 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1952 8, 0, 8, 16, width, height, &bmv[1]);
1954 case VP8_SPLITMVMODE_8x8:
1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956 0, 0, 8, 8, width, height, &bmv[0]);
1957 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1958 8, 0, 8, 8, width, height, &bmv[1]);
1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960 0, 8, 8, 8, width, height, &bmv[2]);
1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1962 8, 8, 8, 8, width, height, &bmv[3]);
1967 static av_always_inline
1968 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1972 if (mb->mode != MODE_I4x4) {
1973 uint8_t *y_dst = dst[0];
1974 for (y = 0; y < 4; y++) {
1975 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1977 if (nnz4 & ~0x01010101) {
1978 for (x = 0; x < 4; x++) {
1979 if ((uint8_t) nnz4 == 1)
1980 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1983 else if ((uint8_t) nnz4 > 1)
1984 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1992 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1995 y_dst += 4 * s->linesize;
1999 for (ch = 0; ch < 2; ch++) {
2000 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2002 uint8_t *ch_dst = dst[1 + ch];
2003 if (nnz4 & ~0x01010101) {
2004 for (y = 0; y < 2; y++) {
2005 for (x = 0; x < 2; x++) {
2006 if ((uint8_t) nnz4 == 1)
2007 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2008 td->block[4 + ch][(y << 1) + x],
2010 else if ((uint8_t) nnz4 > 1)
2011 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2012 td->block[4 + ch][(y << 1) + x],
2016 goto chroma_idct_end;
2018 ch_dst += 4 * s->uvlinesize;
2021 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2029 static av_always_inline
2030 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2031 VP8FilterStrength *f, int is_vp7)
2033 int interior_limit, filter_level;
2035 if (s->segmentation.enabled) {
2036 filter_level = s->segmentation.filter_level[mb->segment];
2037 if (!s->segmentation.absolute_vals)
2038 filter_level += s->filter.level;
2040 filter_level = s->filter.level;
2042 if (s->lf_delta.enabled) {
2043 filter_level += s->lf_delta.ref[mb->ref_frame];
2044 filter_level += s->lf_delta.mode[mb->mode];
2047 filter_level = av_clip_uintp2(filter_level, 6);
2049 interior_limit = filter_level;
2050 if (s->filter.sharpness) {
2051 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2052 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2054 interior_limit = FFMAX(interior_limit, 1);
2056 f->filter_level = filter_level;
2057 f->inner_limit = interior_limit;
2058 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2059 mb->mode == VP8_MVMODE_SPLIT;
2062 static av_always_inline
2063 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2064 int mb_x, int mb_y, int is_vp7)
2066 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2067 int filter_level = f->filter_level;
2068 int inner_limit = f->inner_limit;
2069 int inner_filter = f->inner_filter;
2070 int linesize = s->linesize;
2071 int uvlinesize = s->uvlinesize;
2072 static const uint8_t hev_thresh_lut[2][64] = {
2073 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2074 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2075 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2077 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2078 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2079 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2087 bedge_lim_y = filter_level;
2088 bedge_lim_uv = filter_level * 2;
2089 mbedge_lim = filter_level + 2;
2092 bedge_lim_uv = filter_level * 2 + inner_limit;
2093 mbedge_lim = bedge_lim_y + 4;
2096 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2099 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2100 mbedge_lim, inner_limit, hev_thresh);
2101 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2102 mbedge_lim, inner_limit, hev_thresh);
2105 #define H_LOOP_FILTER_16Y_INNER(cond) \
2106 if (cond && inner_filter) { \
2107 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2108 bedge_lim_y, inner_limit, \
2110 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2111 bedge_lim_y, inner_limit, \
2113 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2114 bedge_lim_y, inner_limit, \
2116 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2117 uvlinesize, bedge_lim_uv, \
2118 inner_limit, hev_thresh); \
2121 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2124 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2125 mbedge_lim, inner_limit, hev_thresh);
2126 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2127 mbedge_lim, inner_limit, hev_thresh);
2131 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2132 linesize, bedge_lim_y,
2133 inner_limit, hev_thresh);
2134 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2135 linesize, bedge_lim_y,
2136 inner_limit, hev_thresh);
2137 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2138 linesize, bedge_lim_y,
2139 inner_limit, hev_thresh);
2140 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2141 dst[2] + 4 * uvlinesize,
2142 uvlinesize, bedge_lim_uv,
2143 inner_limit, hev_thresh);
2146 H_LOOP_FILTER_16Y_INNER(is_vp7)
2149 static av_always_inline
2150 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2153 int mbedge_lim, bedge_lim;
2154 int filter_level = f->filter_level;
2155 int inner_limit = f->inner_limit;
2156 int inner_filter = f->inner_filter;
2157 int linesize = s->linesize;
2162 bedge_lim = 2 * filter_level + inner_limit;
2163 mbedge_lim = bedge_lim + 4;
2166 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2168 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2169 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2170 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2174 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2176 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2177 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2178 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2182 #define MARGIN (16 << 2)
2183 static av_always_inline
2184 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2185 VP8Frame *prev_frame, int is_vp7)
2187 VP8Context *s = avctx->priv_data;
2190 s->mv_min.y = -MARGIN;
2191 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2192 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2193 VP8Macroblock *mb = s->macroblocks_base +
2194 ((s->mb_width + 1) * (mb_y + 1) + 1);
2195 int mb_xy = mb_y * s->mb_width;
2197 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2199 s->mv_min.x = -MARGIN;
2200 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2201 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2203 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2204 DC_PRED * 0x01010101);
2205 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2206 prev_frame && prev_frame->seg_map ?
2207 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2216 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2217 VP8Frame *prev_frame)
2219 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2222 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2223 VP8Frame *prev_frame)
2225 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2229 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2231 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2232 if (otd->thread_mb_pos < tmp) { \
2233 pthread_mutex_lock(&otd->lock); \
2234 td->wait_mb_pos = tmp; \
2236 if (otd->thread_mb_pos >= tmp) \
2238 pthread_cond_wait(&otd->cond, &otd->lock); \
2240 td->wait_mb_pos = INT_MAX; \
2241 pthread_mutex_unlock(&otd->lock); \
2245 #define update_pos(td, mb_y, mb_x) \
2247 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2248 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2250 int is_null = !next_td || !prev_td; \
2251 int pos_check = (is_null) ? 1 \
2252 : (next_td != td && \
2253 pos >= next_td->wait_mb_pos) || \
2255 pos >= prev_td->wait_mb_pos); \
2256 td->thread_mb_pos = pos; \
2257 if (sliced_threading && pos_check) { \
2258 pthread_mutex_lock(&td->lock); \
2259 pthread_cond_broadcast(&td->cond); \
2260 pthread_mutex_unlock(&td->lock); \
2264 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2265 #define update_pos(td, mb_y, mb_x) while(0)
2268 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2269 int jobnr, int threadnr, int is_vp7)
2271 VP8Context *s = avctx->priv_data;
2272 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2273 int mb_y = td->thread_mb_pos >> 16;
2274 int mb_x, mb_xy = mb_y * s->mb_width;
2275 int num_jobs = s->num_jobs;
2276 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2277 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2280 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2281 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2282 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2287 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2288 if (mb_y == s->mb_height - 1)
2291 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2292 if (s->mb_layout == 1)
2293 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2295 // Make sure the previous frame has read its segmentation map,
2296 // if we re-use the same map.
2297 if (prev_frame && s->segmentation.enabled &&
2298 !s->segmentation.update_map)
2299 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2300 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2301 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2302 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2305 if (!is_vp7 || mb_y == 0)
2306 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2308 s->mv_min.x = -MARGIN;
2309 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2311 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2312 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2313 if (prev_td != td) {
2314 if (threadnr != 0) {
2315 check_thread_pos(td, prev_td,
2316 mb_x + (is_vp7 ? 2 : 1),
2317 mb_y - (is_vp7 ? 2 : 1));
2319 check_thread_pos(td, prev_td,
2320 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2321 mb_y - (is_vp7 ? 2 : 1));
2325 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2327 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2328 dst[2] - dst[1], 2);
2331 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2332 prev_frame && prev_frame->seg_map ?
2333 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2335 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2338 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2340 if (mb->mode <= MODE_I4x4)
2341 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2343 inter_predict(s, td, dst, mb, mb_x, mb_y);
2345 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2348 idct_mb(s, td, dst, mb);
2350 AV_ZERO64(td->left_nnz);
2351 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2353 /* Reset DC block predictors if they would exist
2354 * if the mb had coefficients */
2355 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2356 td->left_nnz[8] = 0;
2357 s->top_nnz[mb_x][8] = 0;
2361 if (s->deblock_filter)
2362 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2364 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2365 if (s->filter.simple)
2366 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2367 NULL, NULL, s->linesize, 0, 1);
2369 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2370 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2373 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2381 if (mb_x == s->mb_width + 1) {
2382 update_pos(td, mb_y, s->mb_width + 3);
2384 update_pos(td, mb_y, mb_x);
2389 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2390 int jobnr, int threadnr)
2392 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2395 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2396 int jobnr, int threadnr)
2398 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2401 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2402 int jobnr, int threadnr, int is_vp7)
2404 VP8Context *s = avctx->priv_data;
2405 VP8ThreadData *td = &s->thread_data[threadnr];
2406 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2407 AVFrame *curframe = s->curframe->tf.f;
2409 VP8ThreadData *prev_td, *next_td;
2411 curframe->data[0] + 16 * mb_y * s->linesize,
2412 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2413 curframe->data[2] + 8 * mb_y * s->uvlinesize
2416 if (s->mb_layout == 1)
2417 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2419 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2424 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2425 if (mb_y == s->mb_height - 1)
2428 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2430 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2431 VP8FilterStrength *f = &td->filter_strength[mb_x];
2433 check_thread_pos(td, prev_td,
2434 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2436 if (next_td != &s->thread_data[0])
2437 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2439 if (num_jobs == 1) {
2440 if (s->filter.simple)
2441 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2442 NULL, NULL, s->linesize, 0, 1);
2444 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2445 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2448 if (s->filter.simple)
2449 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2451 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2456 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2460 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2461 int jobnr, int threadnr)
2463 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2466 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2467 int jobnr, int threadnr)
2469 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2472 static av_always_inline
2473 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2474 int threadnr, int is_vp7)
2476 VP8Context *s = avctx->priv_data;
2477 VP8ThreadData *td = &s->thread_data[jobnr];
2478 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2479 VP8Frame *curframe = s->curframe;
2480 int mb_y, num_jobs = s->num_jobs;
2482 td->thread_nr = threadnr;
2483 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2484 if (mb_y >= s->mb_height)
2486 td->thread_mb_pos = mb_y << 16;
2487 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2488 if (s->deblock_filter)
2489 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2490 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2495 if (avctx->active_thread_type == FF_THREAD_FRAME)
2496 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2502 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2503 int jobnr, int threadnr)
2505 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2508 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2509 int jobnr, int threadnr)
2511 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2515 static av_always_inline
2516 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2517 AVPacket *avpkt, int is_vp7)
2519 VP8Context *s = avctx->priv_data;
2520 int ret, i, referenced, num_jobs;
2521 enum AVDiscard skip_thresh;
2522 VP8Frame *av_uninit(curframe), *prev_frame;
2525 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2527 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2532 prev_frame = s->framep[VP56_FRAME_CURRENT];
2534 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2535 s->update_altref == VP56_FRAME_CURRENT;
2537 skip_thresh = !referenced ? AVDISCARD_NONREF
2538 : !s->keyframe ? AVDISCARD_NONKEY
2541 if (avctx->skip_frame >= skip_thresh) {
2543 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2546 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2548 // release no longer referenced frames
2549 for (i = 0; i < 5; i++)
2550 if (s->frames[i].tf.f->data[0] &&
2551 &s->frames[i] != prev_frame &&
2552 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2553 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2554 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2555 vp8_release_frame(s, &s->frames[i]);
2557 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2560 avctx->colorspace = AVCOL_SPC_BT470BG;
2562 avctx->color_range = AVCOL_RANGE_JPEG;
2564 avctx->color_range = AVCOL_RANGE_MPEG;
2566 /* Given that arithmetic probabilities are updated every frame, it's quite
2567 * likely that the values we have on a random interframe are complete
2568 * junk if we didn't start decode on a keyframe. So just don't display
2569 * anything rather than junk. */
2570 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2571 !s->framep[VP56_FRAME_GOLDEN] ||
2572 !s->framep[VP56_FRAME_GOLDEN2])) {
2573 av_log(avctx, AV_LOG_WARNING,
2574 "Discarding interframe without a prior keyframe!\n");
2575 ret = AVERROR_INVALIDDATA;
2579 curframe->tf.f->key_frame = s->keyframe;
2580 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2581 : AV_PICTURE_TYPE_P;
2582 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2585 // check if golden and altref are swapped
2586 if (s->update_altref != VP56_FRAME_NONE)
2587 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2589 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2591 if (s->update_golden != VP56_FRAME_NONE)
2592 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2594 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2597 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2599 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2601 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2603 if (avctx->codec->update_thread_context)
2604 ff_thread_finish_setup(avctx);
2606 s->linesize = curframe->tf.f->linesize[0];
2607 s->uvlinesize = curframe->tf.f->linesize[1];
2609 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2610 /* Zero macroblock structures for top/top-left prediction
2611 * from outside the frame. */
2613 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2614 (s->mb_width + 1) * sizeof(*s->macroblocks));
2615 if (!s->mb_layout && s->keyframe)
2616 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2618 memset(s->ref_count, 0, sizeof(s->ref_count));
2620 if (s->mb_layout == 1) {
2621 // Make sure the previous frame has read its segmentation map,
2622 // if we re-use the same map.
2623 if (prev_frame && s->segmentation.enabled &&
2624 !s->segmentation.update_map)
2625 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2627 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2629 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2632 if (avctx->active_thread_type == FF_THREAD_FRAME)
2635 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2636 s->num_jobs = num_jobs;
2637 s->curframe = curframe;
2638 s->prev_frame = prev_frame;
2639 s->mv_min.y = -MARGIN;
2640 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2641 for (i = 0; i < MAX_THREADS; i++) {
2642 s->thread_data[i].thread_mb_pos = 0;
2643 s->thread_data[i].wait_mb_pos = INT_MAX;
2646 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2649 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2652 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2653 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2656 // if future frames don't use the updated probabilities,
2657 // reset them to the values we saved
2658 if (!s->update_probabilities)
2659 s->prob[0] = s->prob[1];
2661 if (!s->invisible) {
2662 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2669 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2673 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2676 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2679 #if CONFIG_VP7_DECODER
2680 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2683 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2685 #endif /* CONFIG_VP7_DECODER */
2687 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2689 VP8Context *s = avctx->priv_data;
2692 vp8_decode_flush_impl(avctx, 1);
2693 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2694 av_frame_free(&s->frames[i].tf.f);
2699 static av_cold int vp8_init_frames(VP8Context *s)
2702 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2703 s->frames[i].tf.f = av_frame_alloc();
2704 if (!s->frames[i].tf.f)
2705 return AVERROR(ENOMEM);
2710 static av_always_inline
2711 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2713 VP8Context *s = avctx->priv_data;
2717 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2718 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2719 avctx->internal->allocate_progress = 1;
2721 ff_videodsp_init(&s->vdsp, 8);
2723 ff_vp78dsp_init(&s->vp8dsp);
2724 if (CONFIG_VP7_DECODER && is_vp7) {
2725 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2726 ff_vp7dsp_init(&s->vp8dsp);
2727 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2728 s->filter_mb_row = vp7_filter_mb_row;
2729 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2730 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2731 ff_vp8dsp_init(&s->vp8dsp);
2732 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2733 s->filter_mb_row = vp8_filter_mb_row;
2736 /* does not change for VP8 */
2737 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2739 if ((ret = vp8_init_frames(s)) < 0) {
2740 ff_vp8_decode_free(avctx);
2747 #if CONFIG_VP7_DECODER
2748 static int vp7_decode_init(AVCodecContext *avctx)
2750 return vp78_decode_init(avctx, IS_VP7);
2752 #endif /* CONFIG_VP7_DECODER */
2754 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2756 return vp78_decode_init(avctx, IS_VP8);
2759 #if CONFIG_VP8_DECODER
2760 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2762 VP8Context *s = avctx->priv_data;
2767 if ((ret = vp8_init_frames(s)) < 0) {
2768 ff_vp8_decode_free(avctx);
2775 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2777 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2778 const AVCodecContext *src)
2780 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2783 if (s->macroblocks_base &&
2784 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2786 s->mb_width = s_src->mb_width;
2787 s->mb_height = s_src->mb_height;
2790 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2791 s->segmentation = s_src->segmentation;
2792 s->lf_delta = s_src->lf_delta;
2793 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2795 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2796 if (s_src->frames[i].tf.f->data[0]) {
2797 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2803 s->framep[0] = REBASE(s_src->next_framep[0]);
2804 s->framep[1] = REBASE(s_src->next_framep[1]);
2805 s->framep[2] = REBASE(s_src->next_framep[2]);
2806 s->framep[3] = REBASE(s_src->next_framep[3]);
2810 #endif /* CONFIG_VP8_DECODER */
2812 #if CONFIG_VP7_DECODER
2813 AVCodec ff_vp7_decoder = {
2815 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2816 .type = AVMEDIA_TYPE_VIDEO,
2817 .id = AV_CODEC_ID_VP7,
2818 .priv_data_size = sizeof(VP8Context),
2819 .init = vp7_decode_init,
2820 .close = ff_vp8_decode_free,
2821 .decode = vp7_decode_frame,
2822 .capabilities = CODEC_CAP_DR1,
2823 .flush = vp8_decode_flush,
2825 #endif /* CONFIG_VP7_DECODER */
2827 #if CONFIG_VP8_DECODER
2828 AVCodec ff_vp8_decoder = {
2830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2831 .type = AVMEDIA_TYPE_VIDEO,
2832 .id = AV_CODEC_ID_VP8,
2833 .priv_data_size = sizeof(VP8Context),
2834 .init = ff_vp8_decode_init,
2835 .close = ff_vp8_decode_free,
2836 .decode = ff_vp8_decode_frame,
2837 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2838 .flush = vp8_decode_flush,
2839 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2840 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2842 #endif /* CONFIG_VP7_DECODER */