2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context *s)
52 for (i = 0; i < MAX_THREADS; i++) {
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
57 av_freep(&s->thread_data[i].filter_strength);
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
65 s->macroblocks = NULL;
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
92 vp8_release_frame(s, dst);
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
151 AVCodecContext *avctx = s->avctx;
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
158 ret = ff_set_dimensions(s->avctx, width, height);
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
179 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
182 return AVERROR(ENOMEM);
185 for (i = 0; i < MAX_THREADS; i++) {
186 s->thread_data[i].filter_strength =
187 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188 if (!s->thread_data[i].filter_strength) {
190 return AVERROR(ENOMEM);
193 pthread_mutex_init(&s->thread_data[i].lock, NULL);
194 pthread_cond_init(&s->thread_data[i].cond, NULL);
198 s->macroblocks = s->macroblocks_base + 1;
203 static int vp7_update_dimensions(VP8Context *s, int width, int height)
205 return update_dimensions(s, width, height, IS_VP7);
208 static int vp8_update_dimensions(VP8Context *s, int width, int height)
210 return update_dimensions(s, width, height, IS_VP8);
214 static void parse_segment_info(VP8Context *s)
216 VP56RangeCoder *c = &s->c;
219 s->segmentation.update_map = vp8_rac_get(c);
221 if (vp8_rac_get(c)) { // update segment feature data
222 s->segmentation.absolute_vals = vp8_rac_get(c);
224 for (i = 0; i < 4; i++)
225 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
227 for (i = 0; i < 4; i++)
228 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
230 if (s->segmentation.update_map)
231 for (i = 0; i < 3; i++)
232 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
235 static void update_lf_deltas(VP8Context *s)
237 VP56RangeCoder *c = &s->c;
240 for (i = 0; i < 4; i++) {
241 if (vp8_rac_get(c)) {
242 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
245 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
249 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250 if (vp8_rac_get(c)) {
251 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
254 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
261 const uint8_t *sizes = buf;
264 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
266 buf += 3 * (s->num_coeff_partitions - 1);
267 buf_size -= 3 * (s->num_coeff_partitions - 1);
271 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272 int size = AV_RL24(sizes + 3 * i);
273 if (buf_size - size < 0)
276 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
280 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
285 static void vp7_get_quants(VP8Context *s)
287 VP56RangeCoder *c = &s->c;
289 int yac_qi = vp8_rac_get_uint(c, 7);
290 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
304 static void vp8_get_quants(VP8Context *s)
306 VP56RangeCoder *c = &s->c;
309 int yac_qi = vp8_rac_get_uint(c, 7);
310 int ydc_delta = vp8_rac_get_sint(c, 4);
311 int y2dc_delta = vp8_rac_get_sint(c, 4);
312 int y2ac_delta = vp8_rac_get_sint(c, 4);
313 int uvdc_delta = vp8_rac_get_sint(c, 4);
314 int uvac_delta = vp8_rac_get_sint(c, 4);
316 for (i = 0; i < 4; i++) {
317 if (s->segmentation.enabled) {
318 base_qi = s->segmentation.base_quant[i];
319 if (!s->segmentation.absolute_vals)
324 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327 /* 101581>>16 is equivalent to 155/100 */
328 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
332 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
338 * Determine which buffers golden and altref should be updated with after this frame.
339 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
341 * Intra frames update all 3 references
342 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343 * If the update (golden|altref) flag is set, it's updated with the current frame
344 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345 * If the flag is not set, the number read means:
347 * 1: VP56_FRAME_PREVIOUS
348 * 2: update golden with altref, or update altref with golden
350 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
352 VP56RangeCoder *c = &s->c;
355 return VP56_FRAME_CURRENT;
357 switch (vp8_rac_get_uint(c, 2)) {
359 return VP56_FRAME_PREVIOUS;
361 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
363 return VP56_FRAME_NONE;
366 static void vp78_reset_probability_tables(VP8Context *s)
369 for (i = 0; i < 4; i++)
370 for (j = 0; j < 16; j++)
371 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372 sizeof(s->prob->token[i][j]));
375 static void vp78_update_probability_tables(VP8Context *s)
377 VP56RangeCoder *c = &s->c;
380 for (i = 0; i < 4; i++)
381 for (j = 0; j < 8; j++)
382 for (k = 0; k < 3; k++)
383 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
384 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
385 int prob = vp8_rac_get_uint(c, 8);
386 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
394 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
397 VP56RangeCoder *c = &s->c;
401 for (i = 0; i < 4; i++)
402 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
404 for (i = 0; i < 3; i++)
405 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
407 // 17.2 MV probability update
408 for (i = 0; i < 2; i++)
409 for (j = 0; j < mvc_size; j++)
410 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
411 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 static void update_refs(VP8Context *s)
416 VP56RangeCoder *c = &s->c;
418 int update_golden = vp8_rac_get(c);
419 int update_altref = vp8_rac_get(c);
421 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
429 for (j = 1; j < 3; j++) {
430 for (i = 0; i < height / 2; i++)
431 memcpy(dst->data[j] + i * dst->linesize[j],
432 src->data[j] + i * src->linesize[j], width / 2);
436 static void fade(uint8_t *dst, int dst_linesize,
437 const uint8_t *src, int src_linesize,
438 int width, int height,
442 for (j = 0; j < height; j++) {
443 for (i = 0; i < width; i++) {
444 uint8_t y = src[j * src_linesize + i];
445 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
450 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
452 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453 int beta = (int8_t) vp8_rac_get_uint(c, 8);
456 if (!s->keyframe && (alpha || beta)) {
457 int width = s->mb_width * 16;
458 int height = s->mb_height * 16;
461 if (!s->framep[VP56_FRAME_PREVIOUS] ||
462 !s->framep[VP56_FRAME_GOLDEN]) {
463 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464 return AVERROR_INVALIDDATA;
468 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
470 /* preserve the golden frame, write a new previous frame */
471 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
472 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
473 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
476 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
478 copy_chroma(dst, src, width, height);
481 fade(dst->data[0], dst->linesize[0],
482 src->data[0], src->linesize[0],
483 width, height, alpha, beta);
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
491 VP56RangeCoder *c = &s->c;
492 int part1_size, hscale, vscale, i, j, ret;
493 int width = s->avctx->width;
494 int height = s->avctx->height;
496 s->profile = (buf[0] >> 1) & 7;
497 if (s->profile > 1) {
498 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
499 return AVERROR_INVALIDDATA;
502 s->keyframe = !(buf[0] & 1);
504 part1_size = AV_RL24(buf) >> 4;
506 if (buf_size < 4 - s->profile + part1_size) {
507 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
508 return AVERROR_INVALIDDATA;
511 buf += 4 - s->profile;
512 buf_size -= 4 - s->profile;
514 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
516 ff_vp56_init_range_decoder(c, buf, part1_size);
518 buf_size -= part1_size;
520 /* A. Dimension information (keyframes only) */
522 width = vp8_rac_get_uint(c, 12);
523 height = vp8_rac_get_uint(c, 12);
524 hscale = vp8_rac_get_uint(c, 2);
525 vscale = vp8_rac_get_uint(c, 2);
526 if (hscale || vscale)
527 avpriv_request_sample(s->avctx, "Upscaling");
529 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
530 vp78_reset_probability_tables(s);
531 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
532 sizeof(s->prob->pred16x16));
533 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
534 sizeof(s->prob->pred8x8c));
535 for (i = 0; i < 2; i++)
536 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
537 sizeof(vp7_mv_default_prob[i]));
538 memset(&s->segmentation, 0, sizeof(s->segmentation));
539 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
540 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
543 if (s->keyframe || s->profile > 0)
544 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
546 /* B. Decoding information for all four macroblock-level features */
547 for (i = 0; i < 4; i++) {
548 s->feature_enabled[i] = vp8_rac_get(c);
549 if (s->feature_enabled[i]) {
550 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
552 for (j = 0; j < 3; j++)
553 s->feature_index_prob[i][j] =
554 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
556 if (vp7_feature_value_size[s->profile][i])
557 for (j = 0; j < 4; j++)
558 s->feature_value[i][j] =
559 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
563 s->segmentation.enabled = 0;
564 s->segmentation.update_map = 0;
565 s->lf_delta.enabled = 0;
567 s->num_coeff_partitions = 1;
568 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
570 if (!s->macroblocks_base || /* first frame */
571 width != s->avctx->width || height != s->avctx->height ||
572 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
573 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
577 /* C. Dequantization indices */
580 /* D. Golden frame update flag (a Flag) for interframes only */
582 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
583 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
587 s->update_probabilities = 1;
590 if (s->profile > 0) {
591 s->update_probabilities = vp8_rac_get(c);
592 if (!s->update_probabilities)
593 s->prob[1] = s->prob[0];
596 s->fade_present = vp8_rac_get(c);
599 /* E. Fading information for previous frame */
600 if (s->fade_present && vp8_rac_get(c)) {
601 if ((ret = vp7_fade_frame(s ,c)) < 0)
605 /* F. Loop filter type */
607 s->filter.simple = vp8_rac_get(c);
609 /* G. DCT coefficient ordering specification */
611 for (i = 1; i < 16; i++)
612 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
614 /* H. Loop filter levels */
616 s->filter.simple = vp8_rac_get(c);
617 s->filter.level = vp8_rac_get_uint(c, 6);
618 s->filter.sharpness = vp8_rac_get_uint(c, 3);
620 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
621 vp78_update_probability_tables(s);
623 s->mbskip_enabled = 0;
625 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
627 s->prob->intra = vp8_rac_get_uint(c, 8);
628 s->prob->last = vp8_rac_get_uint(c, 8);
629 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
635 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
637 VP56RangeCoder *c = &s->c;
638 int header_size, hscale, vscale, ret;
639 int width = s->avctx->width;
640 int height = s->avctx->height;
643 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
644 return AVERROR_INVALIDDATA;
647 s->keyframe = !(buf[0] & 1);
648 s->profile = (buf[0]>>1) & 7;
649 s->invisible = !(buf[0] & 0x10);
650 header_size = AV_RL24(buf) >> 5;
655 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
658 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
659 sizeof(s->put_pixels_tab));
660 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
661 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
662 sizeof(s->put_pixels_tab));
664 if (header_size > buf_size - 7 * s->keyframe) {
665 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
666 return AVERROR_INVALIDDATA;
670 if (AV_RL24(buf) != 0x2a019d) {
671 av_log(s->avctx, AV_LOG_ERROR,
672 "Invalid start code 0x%x\n", AV_RL24(buf));
673 return AVERROR_INVALIDDATA;
675 width = AV_RL16(buf + 3) & 0x3fff;
676 height = AV_RL16(buf + 5) & 0x3fff;
677 hscale = buf[4] >> 6;
678 vscale = buf[6] >> 6;
682 if (hscale || vscale)
683 avpriv_request_sample(s->avctx, "Upscaling");
685 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
686 vp78_reset_probability_tables(s);
687 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
688 sizeof(s->prob->pred16x16));
689 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
690 sizeof(s->prob->pred8x8c));
691 memcpy(s->prob->mvc, vp8_mv_default_prob,
692 sizeof(s->prob->mvc));
693 memset(&s->segmentation, 0, sizeof(s->segmentation));
694 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
697 ff_vp56_init_range_decoder(c, buf, header_size);
699 buf_size -= header_size;
702 s->colorspace = vp8_rac_get(c);
704 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
705 s->fullrange = vp8_rac_get(c);
708 if ((s->segmentation.enabled = vp8_rac_get(c)))
709 parse_segment_info(s);
711 s->segmentation.update_map = 0; // FIXME: move this to some init function?
713 s->filter.simple = vp8_rac_get(c);
714 s->filter.level = vp8_rac_get_uint(c, 6);
715 s->filter.sharpness = vp8_rac_get_uint(c, 3);
717 if ((s->lf_delta.enabled = vp8_rac_get(c)))
721 if (setup_partitions(s, buf, buf_size)) {
722 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
723 return AVERROR_INVALIDDATA;
726 if (!s->macroblocks_base || /* first frame */
727 width != s->avctx->width || height != s->avctx->height ||
728 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
729 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
736 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
737 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
740 // if we aren't saving this frame's probabilities for future frames,
741 // make a copy of the current probabilities
742 if (!(s->update_probabilities = vp8_rac_get(c)))
743 s->prob[1] = s->prob[0];
745 s->update_last = s->keyframe || vp8_rac_get(c);
747 vp78_update_probability_tables(s);
749 if ((s->mbskip_enabled = vp8_rac_get(c)))
750 s->prob->mbskip = vp8_rac_get_uint(c, 8);
753 s->prob->intra = vp8_rac_get_uint(c, 8);
754 s->prob->last = vp8_rac_get_uint(c, 8);
755 s->prob->golden = vp8_rac_get_uint(c, 8);
756 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
762 static av_always_inline
763 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
765 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
766 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
767 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
768 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
772 * Motion vector coding, 17.1.
774 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
778 if (vp56_rac_get_prob_branchy(c, p[0])) {
781 for (i = 0; i < 3; i++)
782 x += vp56_rac_get_prob(c, p[9 + i]) << i;
783 for (i = (vp7 ? 7 : 9); i > 3; i--)
784 x += vp56_rac_get_prob(c, p[9 + i]) << i;
785 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
789 const uint8_t *ps = p + 2;
790 bit = vp56_rac_get_prob(c, *ps);
793 bit = vp56_rac_get_prob(c, *ps);
796 x += vp56_rac_get_prob(c, *ps);
799 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
802 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
804 return read_mv_component(c, p, 1);
807 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
809 return read_mv_component(c, p, 0);
812 static av_always_inline
813 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
816 return vp7_submv_prob;
819 return vp8_submv_prob[4 - !!left];
821 return vp8_submv_prob[2];
822 return vp8_submv_prob[1 - !!left];
826 * Split motion vector prediction, 16.4.
827 * @returns the number of motion vectors parsed (2, 4 or 16)
829 static av_always_inline
830 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
831 int layout, int is_vp7)
835 VP8Macroblock *top_mb;
836 VP8Macroblock *left_mb = &mb[-1];
837 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
838 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
840 VP56mv *left_mv = left_mb->bmv;
841 VP56mv *cur_mv = mb->bmv;
843 if (!layout) // layout is inlined, s->mb_layout is not
846 top_mb = &mb[-s->mb_width - 1];
847 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
848 top_mv = top_mb->bmv;
850 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
851 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
852 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
854 part_idx = VP8_SPLITMVMODE_8x8;
856 part_idx = VP8_SPLITMVMODE_4x4;
859 num = vp8_mbsplit_count[part_idx];
860 mbsplits_cur = vp8_mbsplits[part_idx],
861 firstidx = vp8_mbfirstidx[part_idx];
862 mb->partitioning = part_idx;
864 for (n = 0; n < num; n++) {
866 uint32_t left, above;
867 const uint8_t *submv_prob;
870 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
872 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
874 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
876 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
878 submv_prob = get_submv_prob(left, above, is_vp7);
880 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
881 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
882 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
883 mb->bmv[n].y = mb->mv.y +
884 read_mv_component(c, s->prob->mvc[0], is_vp7);
885 mb->bmv[n].x = mb->mv.x +
886 read_mv_component(c, s->prob->mvc[1], is_vp7);
888 AV_ZERO32(&mb->bmv[n]);
891 AV_WN32A(&mb->bmv[n], above);
894 AV_WN32A(&mb->bmv[n], left);
902 * The vp7 reference decoder uses a padding macroblock column (added to right
903 * edge of the frame) to guard against illegal macroblock offsets. The
904 * algorithm has bugs that permit offsets to straddle the padding column.
905 * This function replicates those bugs.
907 * @param[out] edge_x macroblock x address
908 * @param[out] edge_y macroblock y address
910 * @return macroblock offset legal (boolean)
912 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
913 int xoffset, int yoffset, int boundary,
914 int *edge_x, int *edge_y)
916 int vwidth = mb_width + 1;
917 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
918 if (new < boundary || new % vwidth == vwidth - 1)
920 *edge_y = new / vwidth;
921 *edge_x = new % vwidth;
925 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
927 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
930 static av_always_inline
931 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
932 int mb_x, int mb_y, int layout)
934 VP8Macroblock *mb_edge[12];
935 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
936 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
939 uint8_t cnt[3] = { 0 };
940 VP56RangeCoder *c = &s->c;
943 AV_ZERO32(&near_mv[0]);
944 AV_ZERO32(&near_mv[1]);
945 AV_ZERO32(&near_mv[2]);
947 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
948 const VP7MVPred * pred = &vp7_mv_pred[i];
951 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
952 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
953 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
954 ? s->macroblocks_base + 1 + edge_x +
955 (s->mb_width + 1) * (edge_y + 1)
956 : s->macroblocks + edge_x +
957 (s->mb_height - edge_y - 1) * 2;
958 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
960 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
961 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
963 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
964 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
968 AV_WN32A(&near_mv[CNT_NEAR], mv);
972 AV_WN32A(&near_mv[CNT_NEAREST], mv);
981 cnt[idx] += vp7_mv_pred[i].score;
984 mb->partitioning = VP8_SPLITMVMODE_NONE;
986 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
987 mb->mode = VP8_MVMODE_MV;
989 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
991 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
993 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
994 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
996 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
998 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
999 mb->mode = VP8_MVMODE_SPLIT;
1000 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1002 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1003 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1004 mb->bmv[0] = mb->mv;
1007 mb->mv = near_mv[CNT_NEAR];
1008 mb->bmv[0] = mb->mv;
1011 mb->mv = near_mv[CNT_NEAREST];
1012 mb->bmv[0] = mb->mv;
1015 mb->mode = VP8_MVMODE_ZERO;
1017 mb->bmv[0] = mb->mv;
1021 static av_always_inline
1022 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1023 int mb_x, int mb_y, int layout)
1025 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1028 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1029 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1031 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1032 int8_t *sign_bias = s->sign_bias;
1034 uint8_t cnt[4] = { 0 };
1035 VP56RangeCoder *c = &s->c;
1037 if (!layout) { // layout is inlined (s->mb_layout is not)
1038 mb_edge[0] = mb + 2;
1039 mb_edge[2] = mb + 1;
1041 mb_edge[0] = mb - s->mb_width - 1;
1042 mb_edge[2] = mb - s->mb_width - 2;
1045 AV_ZERO32(&near_mv[0]);
1046 AV_ZERO32(&near_mv[1]);
1047 AV_ZERO32(&near_mv[2]);
1049 /* Process MB on top, left and top-left */
1050 #define MV_EDGE_CHECK(n) \
1052 VP8Macroblock *edge = mb_edge[n]; \
1053 int edge_ref = edge->ref_frame; \
1054 if (edge_ref != VP56_FRAME_CURRENT) { \
1055 uint32_t mv = AV_RN32A(&edge->mv); \
1057 if (cur_sign_bias != sign_bias[edge_ref]) { \
1058 /* SWAR negate of the values in mv. */ \
1060 mv = ((mv & 0x7fff7fff) + \
1061 0x00010001) ^ (mv & 0x80008000); \
1063 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1064 AV_WN32A(&near_mv[++idx], mv); \
1065 cnt[idx] += 1 + (n != 2); \
1067 cnt[CNT_ZERO] += 1 + (n != 2); \
1075 mb->partitioning = VP8_SPLITMVMODE_NONE;
1076 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1077 mb->mode = VP8_MVMODE_MV;
1079 /* If we have three distinct MVs, merge first and last if they're the same */
1080 if (cnt[CNT_SPLITMV] &&
1081 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1082 cnt[CNT_NEAREST] += 1;
1084 /* Swap near and nearest if necessary */
1085 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1086 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1087 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1090 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1091 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1092 /* Choose the best mv out of 0,0 and the nearest mv */
1093 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1094 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1095 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1096 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1098 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1099 mb->mode = VP8_MVMODE_SPLIT;
1100 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1102 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1103 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1104 mb->bmv[0] = mb->mv;
1107 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1108 mb->bmv[0] = mb->mv;
1111 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1112 mb->bmv[0] = mb->mv;
1115 mb->mode = VP8_MVMODE_ZERO;
1117 mb->bmv[0] = mb->mv;
1121 static av_always_inline
1122 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1123 int mb_x, int keyframe, int layout)
1125 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1128 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1129 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1134 uint8_t *const left = s->intra4x4_pred_mode_left;
1136 top = mb->intra4x4_pred_mode_top;
1138 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1139 for (y = 0; y < 4; y++) {
1140 for (x = 0; x < 4; x++) {
1142 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1143 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1144 left[y] = top[x] = *intra4x4;
1150 for (i = 0; i < 16; i++)
1151 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1152 vp8_pred4x4_prob_inter);
1156 static av_always_inline
1157 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1158 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1160 VP56RangeCoder *c = &s->c;
1161 const char *vp7_feature_name[] = { "q-index",
1163 "partial-golden-update",
1168 for (i = 0; i < 4; i++) {
1169 if (s->feature_enabled[i]) {
1170 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1171 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1172 s->feature_index_prob[i]);
1173 av_log(s->avctx, AV_LOG_WARNING,
1174 "Feature %s present in macroblock (value 0x%x)\n",
1175 vp7_feature_name[i], s->feature_value[i][index]);
1179 } else if (s->segmentation.update_map) {
1180 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1181 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1182 } else if (s->segmentation.enabled)
1183 *segment = ref ? *ref : *segment;
1184 mb->segment = *segment;
1186 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1189 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1190 vp8_pred16x16_prob_intra);
1192 if (mb->mode == MODE_I4x4) {
1193 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1195 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1196 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1198 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1200 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1201 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1204 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1205 vp8_pred8x8c_prob_intra);
1206 mb->ref_frame = VP56_FRAME_CURRENT;
1207 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1209 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1211 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1212 : VP56_FRAME_GOLDEN;
1214 mb->ref_frame = VP56_FRAME_PREVIOUS;
1215 s->ref_count[mb->ref_frame - 1]++;
1217 // motion vectors, 16.3
1219 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1221 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1224 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1226 if (mb->mode == MODE_I4x4)
1227 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1229 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1231 mb->ref_frame = VP56_FRAME_CURRENT;
1232 mb->partitioning = VP8_SPLITMVMODE_NONE;
1233 AV_ZERO32(&mb->bmv[0]);
1238 * @param r arithmetic bitstream reader context
1239 * @param block destination for block coefficients
1240 * @param probs probabilities to use when reading trees from the bitstream
1241 * @param i initial coeff index, 0 unless a separate DC block is coded
1242 * @param qmul array holding the dc/ac dequant factor at position 0/1
1244 * @return 0 if no coeffs were decoded
1245 * otherwise, the index of the last coeff decoded plus one
1247 static av_always_inline
1248 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1249 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1250 int i, uint8_t *token_prob, int16_t qmul[2],
1251 const uint8_t scan[16], int vp7)
1253 VP56RangeCoder c = *r;
1258 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1262 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1264 break; // invalid input; blocks should end with EOB
1265 token_prob = probs[i][0];
1271 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1273 token_prob = probs[i + 1][1];
1275 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1276 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1278 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1282 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1283 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1284 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1285 } else { // DCT_CAT2
1287 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1288 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1290 } else { // DCT_CAT3 and up
1291 int a = vp56_rac_get_prob(&c, token_prob[8]);
1292 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1293 int cat = (a << 1) + b;
1294 coeff = 3 + (8 << cat);
1295 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1298 token_prob = probs[i + 1][2];
1300 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1307 static av_always_inline
1308 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1310 int16_t dc = block[0];
1318 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1319 block[0] = pred[0] = dc;
1324 block[0] = pred[0] = dc;
1330 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1332 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1333 int i, uint8_t *token_prob,
1335 const uint8_t scan[16])
1337 return decode_block_coeffs_internal(r, block, probs, i,
1338 token_prob, qmul, scan, IS_VP7);
1341 #ifndef vp8_decode_block_coeffs_internal
1342 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1344 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1345 int i, uint8_t *token_prob,
1348 return decode_block_coeffs_internal(r, block, probs, i,
1349 token_prob, qmul, zigzag_scan, IS_VP8);
1354 * @param c arithmetic bitstream reader context
1355 * @param block destination for block coefficients
1356 * @param probs probabilities to use when reading trees from the bitstream
1357 * @param i initial coeff index, 0 unless a separate DC block is coded
1358 * @param zero_nhood the initial prediction context for number of surrounding
1359 * all-zero blocks (only left/top, so 0-2)
1360 * @param qmul array holding the dc/ac dequant factor at position 0/1
1361 * @param scan scan pattern (VP7 only)
1363 * @return 0 if no coeffs were decoded
1364 * otherwise, the index of the last coeff decoded plus one
1366 static av_always_inline
1367 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1368 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1369 int i, int zero_nhood, int16_t qmul[2],
1370 const uint8_t scan[16], int vp7)
1372 uint8_t *token_prob = probs[i][zero_nhood];
1373 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1375 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1376 token_prob, qmul, scan)
1377 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1381 static av_always_inline
1382 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1383 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1386 int i, x, y, luma_start = 0, luma_ctx = 3;
1387 int nnz_pred, nnz, nnz_total = 0;
1388 int segment = mb->segment;
1391 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1392 nnz_pred = t_nnz[8] + l_nnz[8];
1394 // decode DC values and do hadamard
1395 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1396 nnz_pred, s->qmat[segment].luma_dc_qmul,
1397 zigzag_scan, is_vp7);
1398 l_nnz[8] = t_nnz[8] = !!nnz;
1400 if (is_vp7 && mb->mode > MODE_I4x4) {
1401 nnz |= inter_predict_dc(td->block_dc,
1402 s->inter_dc_pred[mb->ref_frame - 1]);
1409 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1411 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1418 for (y = 0; y < 4; y++)
1419 for (x = 0; x < 4; x++) {
1420 nnz_pred = l_nnz[y] + t_nnz[x];
1421 nnz = decode_block_coeffs(c, td->block[y][x],
1422 s->prob->token[luma_ctx],
1423 luma_start, nnz_pred,
1424 s->qmat[segment].luma_qmul,
1425 s->prob[0].scan, is_vp7);
1426 /* nnz+block_dc may be one more than the actual last index,
1427 * but we don't care */
1428 td->non_zero_count_cache[y][x] = nnz + block_dc;
1429 t_nnz[x] = l_nnz[y] = !!nnz;
1434 // TODO: what to do about dimensions? 2nd dim for luma is x,
1435 // but for chroma it's (y<<1)|x
1436 for (i = 4; i < 6; i++)
1437 for (y = 0; y < 2; y++)
1438 for (x = 0; x < 2; x++) {
1439 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1440 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1441 s->prob->token[2], 0, nnz_pred,
1442 s->qmat[segment].chroma_qmul,
1443 s->prob[0].scan, is_vp7);
1444 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1445 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1449 // if there were no coded coeffs despite the macroblock not being marked skip,
1450 // we MUST not do the inner loop filter and should not do IDCT
1451 // Since skip isn't used for bitstream prediction, just manually set it.
1456 static av_always_inline
1457 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1458 uint8_t *src_cb, uint8_t *src_cr,
1459 int linesize, int uvlinesize, int simple)
1461 AV_COPY128(top_border, src_y + 15 * linesize);
1463 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1464 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1468 static av_always_inline
1469 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1470 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1471 int mb_y, int mb_width, int simple, int xchg)
1473 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1475 src_cb -= uvlinesize;
1476 src_cr -= uvlinesize;
1478 #define XCHG(a, b, xchg) \
1486 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1487 XCHG(top_border, src_y, xchg);
1488 XCHG(top_border + 8, src_y + 8, 1);
1489 if (mb_x < mb_width - 1)
1490 XCHG(top_border + 32, src_y + 16, 1);
1492 // only copy chroma for normal loop filter
1493 // or to initialize the top row to 127
1494 if (!simple || !mb_y) {
1495 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1496 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1497 XCHG(top_border + 16, src_cb, 1);
1498 XCHG(top_border + 24, src_cr, 1);
1502 static av_always_inline
1503 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1506 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1508 return mb_y ? mode : LEFT_DC_PRED8x8;
1511 static av_always_inline
1512 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1515 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1517 return mb_y ? mode : HOR_PRED8x8;
1520 static av_always_inline
1521 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1525 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1527 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1529 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1530 case PLANE_PRED8x8: /* TM */
1531 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1536 static av_always_inline
1537 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1540 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1542 return mb_y ? mode : HOR_VP8_PRED;
1546 static av_always_inline
1547 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1548 int *copy_buf, int vp7)
1552 if (!mb_x && mb_y) {
1557 case DIAG_DOWN_LEFT_PRED:
1558 case VERT_LEFT_PRED:
1559 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1567 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1569 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1570 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1571 * as 16x16/8x8 DC */
1572 case DIAG_DOWN_RIGHT_PRED:
1573 case VERT_RIGHT_PRED:
1582 static av_always_inline
1583 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1584 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1586 int x, y, mode, nnz;
1589 /* for the first row, we need to run xchg_mb_border to init the top edge
1590 * to 127 otherwise, skip it if we aren't going to deblock */
1591 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1592 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1593 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1594 s->filter.simple, 1);
1596 if (mb->mode < MODE_I4x4) {
1597 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1598 s->hpc.pred16x16[mode](dst[0], s->linesize);
1600 uint8_t *ptr = dst[0];
1601 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1602 const uint8_t lo = is_vp7 ? 128 : 127;
1603 const uint8_t hi = is_vp7 ? 128 : 129;
1604 uint8_t tr_top[4] = { lo, lo, lo, lo };
1606 // all blocks on the right edge of the macroblock use bottom edge
1607 // the top macroblock for their topright edge
1608 uint8_t *tr_right = ptr - s->linesize + 16;
1610 // if we're on the right edge of the frame, said edge is extended
1611 // from the top macroblock
1612 if (mb_y && mb_x == s->mb_width - 1) {
1613 tr = tr_right[-1] * 0x01010101u;
1614 tr_right = (uint8_t *) &tr;
1618 AV_ZERO128(td->non_zero_count_cache);
1620 for (y = 0; y < 4; y++) {
1621 uint8_t *topright = ptr + 4 - s->linesize;
1622 for (x = 0; x < 4; x++) {
1623 int copy = 0, linesize = s->linesize;
1624 uint8_t *dst = ptr + 4 * x;
1625 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1627 if ((y == 0 || x == 3) && mb_y == 0) {
1630 topright = tr_right;
1632 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1633 mb_y + y, ©, is_vp7);
1635 dst = copy_dst + 12;
1639 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1641 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1645 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1654 copy_dst[11] = ptr[4 * x - 1];
1655 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1656 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1657 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1660 s->hpc.pred4x4[mode](dst, topright, linesize);
1662 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1663 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1664 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1665 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1668 nnz = td->non_zero_count_cache[y][x];
1671 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1672 td->block[y][x], s->linesize);
1674 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1675 td->block[y][x], s->linesize);
1680 ptr += 4 * s->linesize;
1685 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1686 mb_x, mb_y, is_vp7);
1687 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1688 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1690 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1691 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1692 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1693 s->filter.simple, 0);
1696 static const uint8_t subpel_idx[3][8] = {
1697 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1698 // also function pointer index
1699 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1700 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1706 * @param s VP8 decoding context
1707 * @param dst target buffer for block data at block position
1708 * @param ref reference picture buffer at origin (0, 0)
1709 * @param mv motion vector (relative to block position) to get pixel data from
1710 * @param x_off horizontal position of block from origin (0, 0)
1711 * @param y_off vertical position of block from origin (0, 0)
1712 * @param block_w width of block (16, 8 or 4)
1713 * @param block_h height of block (always same as block_w)
1714 * @param width width of src/dst plane data
1715 * @param height height of src/dst plane data
1716 * @param linesize size of a single line of plane data, including padding
1717 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1719 static av_always_inline
1720 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1721 ThreadFrame *ref, const VP56mv *mv,
1722 int x_off, int y_off, int block_w, int block_h,
1723 int width, int height, ptrdiff_t linesize,
1724 vp8_mc_func mc_func[3][3])
1726 uint8_t *src = ref->f->data[0];
1729 int src_linesize = linesize;
1731 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1732 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1734 x_off += mv->x >> 2;
1735 y_off += mv->y >> 2;
1738 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1739 src += y_off * linesize + x_off;
1740 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1741 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1742 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1743 src - my_idx * linesize - mx_idx,
1744 EDGE_EMU_LINESIZE, linesize,
1745 block_w + subpel_idx[1][mx],
1746 block_h + subpel_idx[1][my],
1747 x_off - mx_idx, y_off - my_idx,
1749 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1750 src_linesize = EDGE_EMU_LINESIZE;
1752 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1754 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1755 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1756 linesize, block_h, 0, 0);
1761 * chroma MC function
1763 * @param s VP8 decoding context
1764 * @param dst1 target buffer for block data at block position (U plane)
1765 * @param dst2 target buffer for block data at block position (V plane)
1766 * @param ref reference picture buffer at origin (0, 0)
1767 * @param mv motion vector (relative to block position) to get pixel data from
1768 * @param x_off horizontal position of block from origin (0, 0)
1769 * @param y_off vertical position of block from origin (0, 0)
1770 * @param block_w width of block (16, 8 or 4)
1771 * @param block_h height of block (always same as block_w)
1772 * @param width width of src/dst plane data
1773 * @param height height of src/dst plane data
1774 * @param linesize size of a single line of plane data, including padding
1775 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1777 static av_always_inline
1778 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1779 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1780 int x_off, int y_off, int block_w, int block_h,
1781 int width, int height, ptrdiff_t linesize,
1782 vp8_mc_func mc_func[3][3])
1784 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1787 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1788 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1790 x_off += mv->x >> 3;
1791 y_off += mv->y >> 3;
1794 src1 += y_off * linesize + x_off;
1795 src2 += y_off * linesize + x_off;
1796 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1797 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1798 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1799 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1800 src1 - my_idx * linesize - mx_idx,
1801 EDGE_EMU_LINESIZE, linesize,
1802 block_w + subpel_idx[1][mx],
1803 block_h + subpel_idx[1][my],
1804 x_off - mx_idx, y_off - my_idx, width, height);
1805 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1806 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1808 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1809 src2 - my_idx * linesize - mx_idx,
1810 EDGE_EMU_LINESIZE, linesize,
1811 block_w + subpel_idx[1][mx],
1812 block_h + subpel_idx[1][my],
1813 x_off - mx_idx, y_off - my_idx, width, height);
1814 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1815 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1817 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1818 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1821 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1822 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1823 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1827 static av_always_inline
1828 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1829 ThreadFrame *ref_frame, int x_off, int y_off,
1830 int bx_off, int by_off, int block_w, int block_h,
1831 int width, int height, VP56mv *mv)
1836 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1837 ref_frame, mv, x_off + bx_off, y_off + by_off,
1838 block_w, block_h, width, height, s->linesize,
1839 s->put_pixels_tab[block_w == 8]);
1842 if (s->profile == 3) {
1843 /* this block only applies VP8; it is safe to check
1844 * only the profile, as VP7 profile <= 1 */
1856 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1857 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1858 &uvmv, x_off + bx_off, y_off + by_off,
1859 block_w, block_h, width, height, s->uvlinesize,
1860 s->put_pixels_tab[1 + (block_w == 4)]);
1863 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1864 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1865 static av_always_inline
1866 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1869 /* Don't prefetch refs that haven't been used very often this frame. */
1870 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1871 int x_off = mb_x << 4, y_off = mb_y << 4;
1872 int mx = (mb->mv.x >> 2) + x_off + 8;
1873 int my = (mb->mv.y >> 2) + y_off;
1874 uint8_t **src = s->framep[ref]->tf.f->data;
1875 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1876 /* For threading, a ff_thread_await_progress here might be useful, but
1877 * it actually slows down the decoder. Since a bad prefetch doesn't
1878 * generate bad decoder output, we don't run it here. */
1879 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1880 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1881 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1886 * Apply motion vectors to prediction buffer, chapter 18.
1888 static av_always_inline
1889 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1890 VP8Macroblock *mb, int mb_x, int mb_y)
1892 int x_off = mb_x << 4, y_off = mb_y << 4;
1893 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1894 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1895 VP56mv *bmv = mb->bmv;
1897 switch (mb->partitioning) {
1898 case VP8_SPLITMVMODE_NONE:
1899 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1900 0, 0, 16, 16, width, height, &mb->mv);
1902 case VP8_SPLITMVMODE_4x4: {
1907 for (y = 0; y < 4; y++) {
1908 for (x = 0; x < 4; x++) {
1909 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1910 ref, &bmv[4 * y + x],
1911 4 * x + x_off, 4 * y + y_off, 4, 4,
1912 width, height, s->linesize,
1913 s->put_pixels_tab[2]);
1922 for (y = 0; y < 2; y++) {
1923 for (x = 0; x < 2; x++) {
1924 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1925 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1926 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1927 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1928 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1929 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1930 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1932 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1933 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1934 if (s->profile == 3) {
1938 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1939 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1940 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1941 width, height, s->uvlinesize,
1942 s->put_pixels_tab[2]);
1947 case VP8_SPLITMVMODE_16x8:
1948 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1949 0, 0, 16, 8, width, height, &bmv[0]);
1950 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1951 0, 8, 16, 8, width, height, &bmv[1]);
1953 case VP8_SPLITMVMODE_8x16:
1954 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1955 0, 0, 8, 16, width, height, &bmv[0]);
1956 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1957 8, 0, 8, 16, width, height, &bmv[1]);
1959 case VP8_SPLITMVMODE_8x8:
1960 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1961 0, 0, 8, 8, width, height, &bmv[0]);
1962 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1963 8, 0, 8, 8, width, height, &bmv[1]);
1964 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1965 0, 8, 8, 8, width, height, &bmv[2]);
1966 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1967 8, 8, 8, 8, width, height, &bmv[3]);
1972 static av_always_inline
1973 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1977 if (mb->mode != MODE_I4x4) {
1978 uint8_t *y_dst = dst[0];
1979 for (y = 0; y < 4; y++) {
1980 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1982 if (nnz4 & ~0x01010101) {
1983 for (x = 0; x < 4; x++) {
1984 if ((uint8_t) nnz4 == 1)
1985 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1988 else if ((uint8_t) nnz4 > 1)
1989 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1997 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2000 y_dst += 4 * s->linesize;
2004 for (ch = 0; ch < 2; ch++) {
2005 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2007 uint8_t *ch_dst = dst[1 + ch];
2008 if (nnz4 & ~0x01010101) {
2009 for (y = 0; y < 2; y++) {
2010 for (x = 0; x < 2; x++) {
2011 if ((uint8_t) nnz4 == 1)
2012 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2013 td->block[4 + ch][(y << 1) + x],
2015 else if ((uint8_t) nnz4 > 1)
2016 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2017 td->block[4 + ch][(y << 1) + x],
2021 goto chroma_idct_end;
2023 ch_dst += 4 * s->uvlinesize;
2026 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2034 static av_always_inline
2035 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2036 VP8FilterStrength *f, int is_vp7)
2038 int interior_limit, filter_level;
2040 if (s->segmentation.enabled) {
2041 filter_level = s->segmentation.filter_level[mb->segment];
2042 if (!s->segmentation.absolute_vals)
2043 filter_level += s->filter.level;
2045 filter_level = s->filter.level;
2047 if (s->lf_delta.enabled) {
2048 filter_level += s->lf_delta.ref[mb->ref_frame];
2049 filter_level += s->lf_delta.mode[mb->mode];
2052 filter_level = av_clip_uintp2(filter_level, 6);
2054 interior_limit = filter_level;
2055 if (s->filter.sharpness) {
2056 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2057 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2059 interior_limit = FFMAX(interior_limit, 1);
2061 f->filter_level = filter_level;
2062 f->inner_limit = interior_limit;
2063 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2064 mb->mode == VP8_MVMODE_SPLIT;
2067 static av_always_inline
2068 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2069 int mb_x, int mb_y, int is_vp7)
2071 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2072 int filter_level = f->filter_level;
2073 int inner_limit = f->inner_limit;
2074 int inner_filter = f->inner_filter;
2075 int linesize = s->linesize;
2076 int uvlinesize = s->uvlinesize;
2077 static const uint8_t hev_thresh_lut[2][64] = {
2078 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2079 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2080 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2082 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2083 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2084 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2092 bedge_lim_y = filter_level;
2093 bedge_lim_uv = filter_level * 2;
2094 mbedge_lim = filter_level + 2;
2097 bedge_lim_uv = filter_level * 2 + inner_limit;
2098 mbedge_lim = bedge_lim_y + 4;
2101 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2104 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2105 mbedge_lim, inner_limit, hev_thresh);
2106 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2107 mbedge_lim, inner_limit, hev_thresh);
2110 #define H_LOOP_FILTER_16Y_INNER(cond) \
2111 if (cond && inner_filter) { \
2112 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2113 bedge_lim_y, inner_limit, \
2115 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2116 bedge_lim_y, inner_limit, \
2118 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2119 bedge_lim_y, inner_limit, \
2121 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2122 uvlinesize, bedge_lim_uv, \
2123 inner_limit, hev_thresh); \
2126 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2129 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2130 mbedge_lim, inner_limit, hev_thresh);
2131 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2132 mbedge_lim, inner_limit, hev_thresh);
2136 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2137 linesize, bedge_lim_y,
2138 inner_limit, hev_thresh);
2139 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2140 linesize, bedge_lim_y,
2141 inner_limit, hev_thresh);
2142 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2143 linesize, bedge_lim_y,
2144 inner_limit, hev_thresh);
2145 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2146 dst[2] + 4 * uvlinesize,
2147 uvlinesize, bedge_lim_uv,
2148 inner_limit, hev_thresh);
2151 H_LOOP_FILTER_16Y_INNER(is_vp7)
2154 static av_always_inline
2155 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2158 int mbedge_lim, bedge_lim;
2159 int filter_level = f->filter_level;
2160 int inner_limit = f->inner_limit;
2161 int inner_filter = f->inner_filter;
2162 int linesize = s->linesize;
2167 bedge_lim = 2 * filter_level + inner_limit;
2168 mbedge_lim = bedge_lim + 4;
2171 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2173 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2174 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2175 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2179 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2181 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2182 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2183 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2187 #define MARGIN (16 << 2)
2188 static av_always_inline
2189 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2190 VP8Frame *prev_frame, int is_vp7)
2192 VP8Context *s = avctx->priv_data;
2195 s->mv_min.y = -MARGIN;
2196 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2197 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2198 VP8Macroblock *mb = s->macroblocks_base +
2199 ((s->mb_width + 1) * (mb_y + 1) + 1);
2200 int mb_xy = mb_y * s->mb_width;
2202 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2204 s->mv_min.x = -MARGIN;
2205 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2206 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2208 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2209 DC_PRED * 0x01010101);
2210 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2211 prev_frame && prev_frame->seg_map ?
2212 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2221 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2222 VP8Frame *prev_frame)
2224 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2227 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2228 VP8Frame *prev_frame)
2230 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2234 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2236 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2237 if (otd->thread_mb_pos < tmp) { \
2238 pthread_mutex_lock(&otd->lock); \
2239 td->wait_mb_pos = tmp; \
2241 if (otd->thread_mb_pos >= tmp) \
2243 pthread_cond_wait(&otd->cond, &otd->lock); \
2245 td->wait_mb_pos = INT_MAX; \
2246 pthread_mutex_unlock(&otd->lock); \
2250 #define update_pos(td, mb_y, mb_x) \
2252 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2253 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2255 int is_null = !next_td || !prev_td; \
2256 int pos_check = (is_null) ? 1 \
2257 : (next_td != td && \
2258 pos >= next_td->wait_mb_pos) || \
2260 pos >= prev_td->wait_mb_pos); \
2261 td->thread_mb_pos = pos; \
2262 if (sliced_threading && pos_check) { \
2263 pthread_mutex_lock(&td->lock); \
2264 pthread_cond_broadcast(&td->cond); \
2265 pthread_mutex_unlock(&td->lock); \
2269 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2270 #define update_pos(td, mb_y, mb_x) while(0)
2273 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2274 int jobnr, int threadnr, int is_vp7)
2276 VP8Context *s = avctx->priv_data;
2277 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2278 int mb_y = td->thread_mb_pos >> 16;
2279 int mb_x, mb_xy = mb_y * s->mb_width;
2280 int num_jobs = s->num_jobs;
2281 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2282 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2285 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2286 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2287 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2292 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2293 if (mb_y == s->mb_height - 1)
2296 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2297 if (s->mb_layout == 1)
2298 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2300 // Make sure the previous frame has read its segmentation map,
2301 // if we re-use the same map.
2302 if (prev_frame && s->segmentation.enabled &&
2303 !s->segmentation.update_map)
2304 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2305 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2306 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2307 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2310 if (!is_vp7 || mb_y == 0)
2311 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2313 s->mv_min.x = -MARGIN;
2314 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2316 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2317 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2318 if (prev_td != td) {
2319 if (threadnr != 0) {
2320 check_thread_pos(td, prev_td,
2321 mb_x + (is_vp7 ? 2 : 1),
2322 mb_y - (is_vp7 ? 2 : 1));
2324 check_thread_pos(td, prev_td,
2325 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2326 mb_y - (is_vp7 ? 2 : 1));
2330 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2332 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2333 dst[2] - dst[1], 2);
2336 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2337 prev_frame && prev_frame->seg_map ?
2338 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2340 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2343 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2345 if (mb->mode <= MODE_I4x4)
2346 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2348 inter_predict(s, td, dst, mb, mb_x, mb_y);
2350 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2353 idct_mb(s, td, dst, mb);
2355 AV_ZERO64(td->left_nnz);
2356 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2358 /* Reset DC block predictors if they would exist
2359 * if the mb had coefficients */
2360 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2361 td->left_nnz[8] = 0;
2362 s->top_nnz[mb_x][8] = 0;
2366 if (s->deblock_filter)
2367 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2369 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2370 if (s->filter.simple)
2371 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2372 NULL, NULL, s->linesize, 0, 1);
2374 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2375 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2378 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2386 if (mb_x == s->mb_width + 1) {
2387 update_pos(td, mb_y, s->mb_width + 3);
2389 update_pos(td, mb_y, mb_x);
2394 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2395 int jobnr, int threadnr)
2397 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2400 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2401 int jobnr, int threadnr)
2403 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2406 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2407 int jobnr, int threadnr, int is_vp7)
2409 VP8Context *s = avctx->priv_data;
2410 VP8ThreadData *td = &s->thread_data[threadnr];
2411 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2412 AVFrame *curframe = s->curframe->tf.f;
2414 VP8ThreadData *prev_td, *next_td;
2416 curframe->data[0] + 16 * mb_y * s->linesize,
2417 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2418 curframe->data[2] + 8 * mb_y * s->uvlinesize
2421 if (s->mb_layout == 1)
2422 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2424 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2429 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2430 if (mb_y == s->mb_height - 1)
2433 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2435 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2436 VP8FilterStrength *f = &td->filter_strength[mb_x];
2438 check_thread_pos(td, prev_td,
2439 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2441 if (next_td != &s->thread_data[0])
2442 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2444 if (num_jobs == 1) {
2445 if (s->filter.simple)
2446 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2447 NULL, NULL, s->linesize, 0, 1);
2449 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2450 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2453 if (s->filter.simple)
2454 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2456 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2461 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2465 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2466 int jobnr, int threadnr)
2468 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2471 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2472 int jobnr, int threadnr)
2474 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2477 static av_always_inline
2478 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2479 int threadnr, int is_vp7)
2481 VP8Context *s = avctx->priv_data;
2482 VP8ThreadData *td = &s->thread_data[jobnr];
2483 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2484 VP8Frame *curframe = s->curframe;
2485 int mb_y, num_jobs = s->num_jobs;
2487 td->thread_nr = threadnr;
2488 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2489 if (mb_y >= s->mb_height)
2491 td->thread_mb_pos = mb_y << 16;
2492 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2493 if (s->deblock_filter)
2494 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2495 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2500 if (avctx->active_thread_type == FF_THREAD_FRAME)
2501 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2507 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2508 int jobnr, int threadnr)
2510 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2513 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2514 int jobnr, int threadnr)
2516 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2520 static av_always_inline
2521 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2522 AVPacket *avpkt, int is_vp7)
2524 VP8Context *s = avctx->priv_data;
2525 int ret, i, referenced, num_jobs;
2526 enum AVDiscard skip_thresh;
2527 VP8Frame *av_uninit(curframe), *prev_frame;
2530 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2532 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2537 prev_frame = s->framep[VP56_FRAME_CURRENT];
2539 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2540 s->update_altref == VP56_FRAME_CURRENT;
2542 skip_thresh = !referenced ? AVDISCARD_NONREF
2543 : !s->keyframe ? AVDISCARD_NONKEY
2546 if (avctx->skip_frame >= skip_thresh) {
2548 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2551 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2553 // release no longer referenced frames
2554 for (i = 0; i < 5; i++)
2555 if (s->frames[i].tf.f->data[0] &&
2556 &s->frames[i] != prev_frame &&
2557 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2558 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2559 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2560 vp8_release_frame(s, &s->frames[i]);
2562 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2565 avctx->colorspace = AVCOL_SPC_BT470BG;
2567 avctx->color_range = AVCOL_RANGE_JPEG;
2569 avctx->color_range = AVCOL_RANGE_MPEG;
2571 /* Given that arithmetic probabilities are updated every frame, it's quite
2572 * likely that the values we have on a random interframe are complete
2573 * junk if we didn't start decode on a keyframe. So just don't display
2574 * anything rather than junk. */
2575 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2576 !s->framep[VP56_FRAME_GOLDEN] ||
2577 !s->framep[VP56_FRAME_GOLDEN2])) {
2578 av_log(avctx, AV_LOG_WARNING,
2579 "Discarding interframe without a prior keyframe!\n");
2580 ret = AVERROR_INVALIDDATA;
2584 curframe->tf.f->key_frame = s->keyframe;
2585 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2586 : AV_PICTURE_TYPE_P;
2587 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2590 // check if golden and altref are swapped
2591 if (s->update_altref != VP56_FRAME_NONE)
2592 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2594 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2596 if (s->update_golden != VP56_FRAME_NONE)
2597 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2599 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2602 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2604 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2606 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2608 if (avctx->codec->update_thread_context)
2609 ff_thread_finish_setup(avctx);
2611 s->linesize = curframe->tf.f->linesize[0];
2612 s->uvlinesize = curframe->tf.f->linesize[1];
2614 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2615 /* Zero macroblock structures for top/top-left prediction
2616 * from outside the frame. */
2618 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2619 (s->mb_width + 1) * sizeof(*s->macroblocks));
2620 if (!s->mb_layout && s->keyframe)
2621 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2623 memset(s->ref_count, 0, sizeof(s->ref_count));
2625 if (s->mb_layout == 1) {
2626 // Make sure the previous frame has read its segmentation map,
2627 // if we re-use the same map.
2628 if (prev_frame && s->segmentation.enabled &&
2629 !s->segmentation.update_map)
2630 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2632 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2634 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2637 if (avctx->active_thread_type == FF_THREAD_FRAME)
2640 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2641 s->num_jobs = num_jobs;
2642 s->curframe = curframe;
2643 s->prev_frame = prev_frame;
2644 s->mv_min.y = -MARGIN;
2645 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2646 for (i = 0; i < MAX_THREADS; i++) {
2647 s->thread_data[i].thread_mb_pos = 0;
2648 s->thread_data[i].wait_mb_pos = INT_MAX;
2651 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2654 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2657 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2658 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2661 // if future frames don't use the updated probabilities,
2662 // reset them to the values we saved
2663 if (!s->update_probabilities)
2664 s->prob[0] = s->prob[1];
2666 if (!s->invisible) {
2667 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2674 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2678 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2681 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2684 #if CONFIG_VP7_DECODER
2685 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2688 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2690 #endif /* CONFIG_VP7_DECODER */
2692 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2694 VP8Context *s = avctx->priv_data;
2700 vp8_decode_flush_impl(avctx, 1);
2701 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2702 av_frame_free(&s->frames[i].tf.f);
2707 static av_cold int vp8_init_frames(VP8Context *s)
2710 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2711 s->frames[i].tf.f = av_frame_alloc();
2712 if (!s->frames[i].tf.f)
2713 return AVERROR(ENOMEM);
2718 static av_always_inline
2719 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2721 VP8Context *s = avctx->priv_data;
2725 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2726 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2727 avctx->internal->allocate_progress = 1;
2729 ff_videodsp_init(&s->vdsp, 8);
2731 ff_vp78dsp_init(&s->vp8dsp);
2732 if (CONFIG_VP7_DECODER && is_vp7) {
2733 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2734 ff_vp7dsp_init(&s->vp8dsp);
2735 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2736 s->filter_mb_row = vp7_filter_mb_row;
2737 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2738 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2739 ff_vp8dsp_init(&s->vp8dsp);
2740 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2741 s->filter_mb_row = vp8_filter_mb_row;
2744 /* does not change for VP8 */
2745 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2747 if ((ret = vp8_init_frames(s)) < 0) {
2748 ff_vp8_decode_free(avctx);
2755 #if CONFIG_VP7_DECODER
2756 static int vp7_decode_init(AVCodecContext *avctx)
2758 return vp78_decode_init(avctx, IS_VP7);
2760 #endif /* CONFIG_VP7_DECODER */
2762 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2764 return vp78_decode_init(avctx, IS_VP8);
2767 #if CONFIG_VP8_DECODER
2768 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2770 VP8Context *s = avctx->priv_data;
2775 if ((ret = vp8_init_frames(s)) < 0) {
2776 ff_vp8_decode_free(avctx);
2783 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2785 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2786 const AVCodecContext *src)
2788 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2791 if (s->macroblocks_base &&
2792 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2794 s->mb_width = s_src->mb_width;
2795 s->mb_height = s_src->mb_height;
2798 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2799 s->segmentation = s_src->segmentation;
2800 s->lf_delta = s_src->lf_delta;
2801 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2803 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2804 if (s_src->frames[i].tf.f->data[0]) {
2805 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2811 s->framep[0] = REBASE(s_src->next_framep[0]);
2812 s->framep[1] = REBASE(s_src->next_framep[1]);
2813 s->framep[2] = REBASE(s_src->next_framep[2]);
2814 s->framep[3] = REBASE(s_src->next_framep[3]);
2818 #endif /* CONFIG_VP8_DECODER */
2820 #if CONFIG_VP7_DECODER
2821 AVCodec ff_vp7_decoder = {
2823 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2824 .type = AVMEDIA_TYPE_VIDEO,
2825 .id = AV_CODEC_ID_VP7,
2826 .priv_data_size = sizeof(VP8Context),
2827 .init = vp7_decode_init,
2828 .close = ff_vp8_decode_free,
2829 .decode = vp7_decode_frame,
2830 .capabilities = CODEC_CAP_DR1,
2831 .flush = vp8_decode_flush,
2833 #endif /* CONFIG_VP7_DECODER */
2835 #if CONFIG_VP8_DECODER
2836 AVCodec ff_vp8_decoder = {
2838 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2839 .type = AVMEDIA_TYPE_VIDEO,
2840 .id = AV_CODEC_ID_VP8,
2841 .priv_data_size = sizeof(VP8Context),
2842 .init = ff_vp8_decode_init,
2843 .close = ff_vp8_decode_free,
2844 .decode = ff_vp8_decode_frame,
2845 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2846 .flush = vp8_decode_flush,
2847 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2848 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2850 #endif /* CONFIG_VP7_DECODER */