2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context *s)
52 for (i = 0; i < MAX_THREADS; i++) {
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
57 av_freep(&s->thread_data[i].filter_strength);
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
65 s->macroblocks = NULL;
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
92 vp8_release_frame(s, dst);
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
151 AVCodecContext *avctx = s->avctx;
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
158 ret = ff_set_dimensions(s->avctx, width, height);
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
179 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
180 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
182 return AVERROR(ENOMEM);
185 for (i = 0; i < MAX_THREADS; i++) {
186 s->thread_data[i].filter_strength =
187 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
188 if (!s->thread_data[i].filter_strength) {
190 return AVERROR(ENOMEM);
193 pthread_mutex_init(&s->thread_data[i].lock, NULL);
194 pthread_cond_init(&s->thread_data[i].cond, NULL);
198 s->macroblocks = s->macroblocks_base + 1;
203 static int vp7_update_dimensions(VP8Context *s, int width, int height)
205 return update_dimensions(s, width, height, IS_VP7);
208 static int vp8_update_dimensions(VP8Context *s, int width, int height)
210 return update_dimensions(s, width, height, IS_VP8);
214 static void parse_segment_info(VP8Context *s)
216 VP56RangeCoder *c = &s->c;
219 s->segmentation.update_map = vp8_rac_get(c);
221 if (vp8_rac_get(c)) { // update segment feature data
222 s->segmentation.absolute_vals = vp8_rac_get(c);
224 for (i = 0; i < 4; i++)
225 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
227 for (i = 0; i < 4; i++)
228 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
230 if (s->segmentation.update_map)
231 for (i = 0; i < 3; i++)
232 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
235 static void update_lf_deltas(VP8Context *s)
237 VP56RangeCoder *c = &s->c;
240 for (i = 0; i < 4; i++) {
241 if (vp8_rac_get(c)) {
242 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
245 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
249 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
250 if (vp8_rac_get(c)) {
251 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
254 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
259 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
261 const uint8_t *sizes = buf;
264 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
266 buf += 3 * (s->num_coeff_partitions - 1);
267 buf_size -= 3 * (s->num_coeff_partitions - 1);
271 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
272 int size = AV_RL24(sizes + 3 * i);
273 if (buf_size - size < 0)
276 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
280 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
285 static void vp7_get_quants(VP8Context *s)
287 VP56RangeCoder *c = &s->c;
289 int yac_qi = vp8_rac_get_uint(c, 7);
290 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
291 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
297 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
298 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
299 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
300 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
301 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
304 static void vp8_get_quants(VP8Context *s)
306 VP56RangeCoder *c = &s->c;
309 int yac_qi = vp8_rac_get_uint(c, 7);
310 int ydc_delta = vp8_rac_get_sint(c, 4);
311 int y2dc_delta = vp8_rac_get_sint(c, 4);
312 int y2ac_delta = vp8_rac_get_sint(c, 4);
313 int uvdc_delta = vp8_rac_get_sint(c, 4);
314 int uvac_delta = vp8_rac_get_sint(c, 4);
316 for (i = 0; i < 4; i++) {
317 if (s->segmentation.enabled) {
318 base_qi = s->segmentation.base_quant[i];
319 if (!s->segmentation.absolute_vals)
324 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
325 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
326 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
327 /* 101581>>16 is equivalent to 155/100 */
328 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
329 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
330 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
332 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
333 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
338 * Determine which buffers golden and altref should be updated with after this frame.
339 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
341 * Intra frames update all 3 references
342 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
343 * If the update (golden|altref) flag is set, it's updated with the current frame
344 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
345 * If the flag is not set, the number read means:
347 * 1: VP56_FRAME_PREVIOUS
348 * 2: update golden with altref, or update altref with golden
350 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
352 VP56RangeCoder *c = &s->c;
355 return VP56_FRAME_CURRENT;
357 switch (vp8_rac_get_uint(c, 2)) {
359 return VP56_FRAME_PREVIOUS;
361 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
363 return VP56_FRAME_NONE;
366 static void vp78_reset_probability_tables(VP8Context *s)
369 for (i = 0; i < 4; i++)
370 for (j = 0; j < 16; j++)
371 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
372 sizeof(s->prob->token[i][j]));
375 static void vp78_update_probability_tables(VP8Context *s)
377 VP56RangeCoder *c = &s->c;
380 for (i = 0; i < 4; i++)
381 for (j = 0; j < 8; j++)
382 for (k = 0; k < 3; k++)
383 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
384 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
385 int prob = vp8_rac_get_uint(c, 8);
386 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
387 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
391 #define VP7_MVC_SIZE 17
392 #define VP8_MVC_SIZE 19
394 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
397 VP56RangeCoder *c = &s->c;
401 for (i = 0; i < 4; i++)
402 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
404 for (i = 0; i < 3; i++)
405 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
407 // 17.2 MV probability update
408 for (i = 0; i < 2; i++)
409 for (j = 0; j < mvc_size; j++)
410 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
411 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
414 static void update_refs(VP8Context *s)
416 VP56RangeCoder *c = &s->c;
418 int update_golden = vp8_rac_get(c);
419 int update_altref = vp8_rac_get(c);
421 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
422 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
425 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
429 for (j = 1; j < 3; j++) {
430 for (i = 0; i < height / 2; i++)
431 memcpy(dst->data[j] + i * dst->linesize[j],
432 src->data[j] + i * src->linesize[j], width / 2);
436 static void fade(uint8_t *dst, int dst_linesize,
437 const uint8_t *src, int src_linesize,
438 int width, int height,
442 for (j = 0; j < height; j++) {
443 for (i = 0; i < width; i++) {
444 uint8_t y = src[j * src_linesize + i];
445 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
450 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
452 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
453 int beta = (int8_t) vp8_rac_get_uint(c, 8);
456 if (!s->keyframe && (alpha || beta)) {
457 int width = s->mb_width * 16;
458 int height = s->mb_height * 16;
461 if (!s->framep[VP56_FRAME_PREVIOUS] ||
462 !s->framep[VP56_FRAME_GOLDEN]) {
463 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
464 return AVERROR_INVALIDDATA;
468 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
470 /* preserve the golden frame, write a new previous frame */
471 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
472 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
473 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
476 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
478 copy_chroma(dst, src, width, height);
481 fade(dst->data[0], dst->linesize[0],
482 src->data[0], src->linesize[0],
483 width, height, alpha, beta);
489 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
491 VP56RangeCoder *c = &s->c;
492 int part1_size, hscale, vscale, i, j, ret;
493 int width = s->avctx->width;
494 int height = s->avctx->height;
496 s->profile = (buf[0] >> 1) & 7;
497 if (s->profile > 1) {
498 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
499 return AVERROR_INVALIDDATA;
502 s->keyframe = !(buf[0] & 1);
504 part1_size = AV_RL24(buf) >> 4;
506 if (buf_size < 4 - s->profile + part1_size) {
507 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
508 return AVERROR_INVALIDDATA;
511 buf += 4 - s->profile;
512 buf_size -= 4 - s->profile;
514 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
516 ff_vp56_init_range_decoder(c, buf, part1_size);
518 buf_size -= part1_size;
520 /* A. Dimension information (keyframes only) */
522 width = vp8_rac_get_uint(c, 12);
523 height = vp8_rac_get_uint(c, 12);
524 hscale = vp8_rac_get_uint(c, 2);
525 vscale = vp8_rac_get_uint(c, 2);
526 if (hscale || vscale)
527 avpriv_request_sample(s->avctx, "Upscaling");
529 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
530 vp78_reset_probability_tables(s);
531 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
532 sizeof(s->prob->pred16x16));
533 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
534 sizeof(s->prob->pred8x8c));
535 for (i = 0; i < 2; i++)
536 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
537 sizeof(vp7_mv_default_prob[i]));
538 memset(&s->segmentation, 0, sizeof(s->segmentation));
539 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
540 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
543 if (s->keyframe || s->profile > 0)
544 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
546 /* B. Decoding information for all four macroblock-level features */
547 for (i = 0; i < 4; i++) {
548 s->feature_enabled[i] = vp8_rac_get(c);
549 if (s->feature_enabled[i]) {
550 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
552 for (j = 0; j < 3; j++)
553 s->feature_index_prob[i][j] =
554 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
556 if (vp7_feature_value_size[s->profile][i])
557 for (j = 0; j < 4; j++)
558 s->feature_value[i][j] =
559 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
563 s->segmentation.enabled = 0;
564 s->segmentation.update_map = 0;
565 s->lf_delta.enabled = 0;
567 s->num_coeff_partitions = 1;
568 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
570 if (!s->macroblocks_base || /* first frame */
571 width != s->avctx->width || height != s->avctx->height ||
572 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
573 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
577 /* C. Dequantization indices */
580 /* D. Golden frame update flag (a Flag) for interframes only */
582 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
583 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
587 s->update_probabilities = 1;
590 if (s->profile > 0) {
591 s->update_probabilities = vp8_rac_get(c);
592 if (!s->update_probabilities)
593 s->prob[1] = s->prob[0];
596 s->fade_present = vp8_rac_get(c);
599 /* E. Fading information for previous frame */
600 if (s->fade_present && vp8_rac_get(c)) {
601 if ((ret = vp7_fade_frame(s ,c)) < 0)
605 /* F. Loop filter type */
607 s->filter.simple = vp8_rac_get(c);
609 /* G. DCT coefficient ordering specification */
611 for (i = 1; i < 16; i++)
612 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
614 /* H. Loop filter levels */
616 s->filter.simple = vp8_rac_get(c);
617 s->filter.level = vp8_rac_get_uint(c, 6);
618 s->filter.sharpness = vp8_rac_get_uint(c, 3);
620 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
621 vp78_update_probability_tables(s);
623 s->mbskip_enabled = 0;
625 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
627 s->prob->intra = vp8_rac_get_uint(c, 8);
628 s->prob->last = vp8_rac_get_uint(c, 8);
629 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
635 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
637 VP56RangeCoder *c = &s->c;
638 int header_size, hscale, vscale, ret;
639 int width = s->avctx->width;
640 int height = s->avctx->height;
642 s->keyframe = !(buf[0] & 1);
643 s->profile = (buf[0]>>1) & 7;
644 s->invisible = !(buf[0] & 0x10);
645 header_size = AV_RL24(buf) >> 5;
650 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
653 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
654 sizeof(s->put_pixels_tab));
655 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
656 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
657 sizeof(s->put_pixels_tab));
659 if (header_size > buf_size - 7 * s->keyframe) {
660 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
661 return AVERROR_INVALIDDATA;
665 if (AV_RL24(buf) != 0x2a019d) {
666 av_log(s->avctx, AV_LOG_ERROR,
667 "Invalid start code 0x%x\n", AV_RL24(buf));
668 return AVERROR_INVALIDDATA;
670 width = AV_RL16(buf + 3) & 0x3fff;
671 height = AV_RL16(buf + 5) & 0x3fff;
672 hscale = buf[4] >> 6;
673 vscale = buf[6] >> 6;
677 if (hscale || vscale)
678 avpriv_request_sample(s->avctx, "Upscaling");
680 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
681 vp78_reset_probability_tables(s);
682 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
683 sizeof(s->prob->pred16x16));
684 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
685 sizeof(s->prob->pred8x8c));
686 memcpy(s->prob->mvc, vp8_mv_default_prob,
687 sizeof(s->prob->mvc));
688 memset(&s->segmentation, 0, sizeof(s->segmentation));
689 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
692 ff_vp56_init_range_decoder(c, buf, header_size);
694 buf_size -= header_size;
697 s->colorspace = vp8_rac_get(c);
699 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
700 s->fullrange = vp8_rac_get(c);
703 if ((s->segmentation.enabled = vp8_rac_get(c)))
704 parse_segment_info(s);
706 s->segmentation.update_map = 0; // FIXME: move this to some init function?
708 s->filter.simple = vp8_rac_get(c);
709 s->filter.level = vp8_rac_get_uint(c, 6);
710 s->filter.sharpness = vp8_rac_get_uint(c, 3);
712 if ((s->lf_delta.enabled = vp8_rac_get(c)))
716 if (setup_partitions(s, buf, buf_size)) {
717 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
718 return AVERROR_INVALIDDATA;
721 if (!s->macroblocks_base || /* first frame */
722 width != s->avctx->width || height != s->avctx->height ||
723 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
724 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
731 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
732 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
735 // if we aren't saving this frame's probabilities for future frames,
736 // make a copy of the current probabilities
737 if (!(s->update_probabilities = vp8_rac_get(c)))
738 s->prob[1] = s->prob[0];
740 s->update_last = s->keyframe || vp8_rac_get(c);
742 vp78_update_probability_tables(s);
744 if ((s->mbskip_enabled = vp8_rac_get(c)))
745 s->prob->mbskip = vp8_rac_get_uint(c, 8);
748 s->prob->intra = vp8_rac_get_uint(c, 8);
749 s->prob->last = vp8_rac_get_uint(c, 8);
750 s->prob->golden = vp8_rac_get_uint(c, 8);
751 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
757 static av_always_inline
758 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
760 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
761 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
765 * Motion vector coding, 17.1.
767 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
771 if (vp56_rac_get_prob_branchy(c, p[0])) {
774 for (i = 0; i < 3; i++)
775 x += vp56_rac_get_prob(c, p[9 + i]) << i;
776 for (i = (vp7 ? 7 : 9); i > 3; i--)
777 x += vp56_rac_get_prob(c, p[9 + i]) << i;
778 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
782 const uint8_t *ps = p + 2;
783 bit = vp56_rac_get_prob(c, *ps);
786 bit = vp56_rac_get_prob(c, *ps);
789 x += vp56_rac_get_prob(c, *ps);
792 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
795 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
797 return read_mv_component(c, p, 1);
800 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
802 return read_mv_component(c, p, 0);
805 static av_always_inline
806 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
809 return vp7_submv_prob;
812 return vp8_submv_prob[4 - !!left];
814 return vp8_submv_prob[2];
815 return vp8_submv_prob[1 - !!left];
819 * Split motion vector prediction, 16.4.
820 * @returns the number of motion vectors parsed (2, 4 or 16)
822 static av_always_inline
823 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
824 int layout, int is_vp7)
828 VP8Macroblock *top_mb;
829 VP8Macroblock *left_mb = &mb[-1];
830 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
831 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
833 VP56mv *left_mv = left_mb->bmv;
834 VP56mv *cur_mv = mb->bmv;
836 if (!layout) // layout is inlined, s->mb_layout is not
839 top_mb = &mb[-s->mb_width - 1];
840 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
841 top_mv = top_mb->bmv;
843 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
844 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
845 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
847 part_idx = VP8_SPLITMVMODE_8x8;
849 part_idx = VP8_SPLITMVMODE_4x4;
852 num = vp8_mbsplit_count[part_idx];
853 mbsplits_cur = vp8_mbsplits[part_idx],
854 firstidx = vp8_mbfirstidx[part_idx];
855 mb->partitioning = part_idx;
857 for (n = 0; n < num; n++) {
859 uint32_t left, above;
860 const uint8_t *submv_prob;
863 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
865 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
867 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
869 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
871 submv_prob = get_submv_prob(left, above, is_vp7);
873 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
874 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
875 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
876 mb->bmv[n].y = mb->mv.y +
877 read_mv_component(c, s->prob->mvc[0], is_vp7);
878 mb->bmv[n].x = mb->mv.x +
879 read_mv_component(c, s->prob->mvc[1], is_vp7);
881 AV_ZERO32(&mb->bmv[n]);
884 AV_WN32A(&mb->bmv[n], above);
887 AV_WN32A(&mb->bmv[n], left);
895 * The vp7 reference decoder uses a padding macroblock column (added to right
896 * edge of the frame) to guard against illegal macroblock offsets. The
897 * algorithm has bugs that permit offsets to straddle the padding column.
898 * This function replicates those bugs.
900 * @param[out] edge_x macroblock x address
901 * @param[out] edge_y macroblock y address
903 * @return macroblock offset legal (boolean)
905 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
906 int xoffset, int yoffset, int boundary,
907 int *edge_x, int *edge_y)
909 int vwidth = mb_width + 1;
910 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
911 if (new < boundary || new % vwidth == vwidth - 1)
913 *edge_y = new / vwidth;
914 *edge_x = new % vwidth;
918 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
920 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
923 static av_always_inline
924 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
925 int mb_x, int mb_y, int layout)
927 VP8Macroblock *mb_edge[12];
928 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
929 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
932 uint8_t cnt[3] = { 0 };
933 VP56RangeCoder *c = &s->c;
936 AV_ZERO32(&near_mv[0]);
937 AV_ZERO32(&near_mv[1]);
938 AV_ZERO32(&near_mv[2]);
940 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
941 const VP7MVPred * pred = &vp7_mv_pred[i];
944 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
945 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
946 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
947 ? s->macroblocks_base + 1 + edge_x +
948 (s->mb_width + 1) * (edge_y + 1)
949 : s->macroblocks + edge_x +
950 (s->mb_height - edge_y - 1) * 2;
951 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
953 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
954 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
956 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
957 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
961 AV_WN32A(&near_mv[CNT_NEAR], mv);
965 AV_WN32A(&near_mv[CNT_NEAREST], mv);
974 cnt[idx] += vp7_mv_pred[i].score;
977 mb->partitioning = VP8_SPLITMVMODE_NONE;
979 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
980 mb->mode = VP8_MVMODE_MV;
982 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
984 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
986 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
987 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
989 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
991 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
992 mb->mode = VP8_MVMODE_SPLIT;
993 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
995 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
996 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1000 mb->mv = near_mv[CNT_NEAR];
1001 mb->bmv[0] = mb->mv;
1004 mb->mv = near_mv[CNT_NEAREST];
1005 mb->bmv[0] = mb->mv;
1008 mb->mode = VP8_MVMODE_ZERO;
1010 mb->bmv[0] = mb->mv;
1014 static av_always_inline
1015 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1016 int mb_x, int mb_y, int layout)
1018 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1021 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1022 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1024 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1025 int8_t *sign_bias = s->sign_bias;
1027 uint8_t cnt[4] = { 0 };
1028 VP56RangeCoder *c = &s->c;
1030 if (!layout) { // layout is inlined (s->mb_layout is not)
1031 mb_edge[0] = mb + 2;
1032 mb_edge[2] = mb + 1;
1034 mb_edge[0] = mb - s->mb_width - 1;
1035 mb_edge[2] = mb - s->mb_width - 2;
1038 AV_ZERO32(&near_mv[0]);
1039 AV_ZERO32(&near_mv[1]);
1040 AV_ZERO32(&near_mv[2]);
1042 /* Process MB on top, left and top-left */
1043 #define MV_EDGE_CHECK(n) \
1045 VP8Macroblock *edge = mb_edge[n]; \
1046 int edge_ref = edge->ref_frame; \
1047 if (edge_ref != VP56_FRAME_CURRENT) { \
1048 uint32_t mv = AV_RN32A(&edge->mv); \
1050 if (cur_sign_bias != sign_bias[edge_ref]) { \
1051 /* SWAR negate of the values in mv. */ \
1053 mv = ((mv & 0x7fff7fff) + \
1054 0x00010001) ^ (mv & 0x80008000); \
1056 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1057 AV_WN32A(&near_mv[++idx], mv); \
1058 cnt[idx] += 1 + (n != 2); \
1060 cnt[CNT_ZERO] += 1 + (n != 2); \
1068 mb->partitioning = VP8_SPLITMVMODE_NONE;
1069 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1070 mb->mode = VP8_MVMODE_MV;
1072 /* If we have three distinct MVs, merge first and last if they're the same */
1073 if (cnt[CNT_SPLITMV] &&
1074 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1075 cnt[CNT_NEAREST] += 1;
1077 /* Swap near and nearest if necessary */
1078 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1079 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1080 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1083 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1085 /* Choose the best mv out of 0,0 and the nearest mv */
1086 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1087 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1088 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1089 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1091 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1092 mb->mode = VP8_MVMODE_SPLIT;
1093 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1095 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1096 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1097 mb->bmv[0] = mb->mv;
1100 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1101 mb->bmv[0] = mb->mv;
1104 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1105 mb->bmv[0] = mb->mv;
1108 mb->mode = VP8_MVMODE_ZERO;
1110 mb->bmv[0] = mb->mv;
1114 static av_always_inline
1115 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1116 int mb_x, int keyframe, int layout)
1118 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1121 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1122 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1127 uint8_t *const left = s->intra4x4_pred_mode_left;
1129 top = mb->intra4x4_pred_mode_top;
1131 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1132 for (y = 0; y < 4; y++) {
1133 for (x = 0; x < 4; x++) {
1135 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1136 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1137 left[y] = top[x] = *intra4x4;
1143 for (i = 0; i < 16; i++)
1144 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1145 vp8_pred4x4_prob_inter);
1149 static av_always_inline
1150 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1151 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1153 VP56RangeCoder *c = &s->c;
1154 const char *vp7_feature_name[] = { "q-index",
1156 "partial-golden-update",
1161 for (i = 0; i < 4; i++) {
1162 if (s->feature_enabled[i]) {
1163 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1164 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1165 s->feature_index_prob[i]);
1166 av_log(s->avctx, AV_LOG_WARNING,
1167 "Feature %s present in macroblock (value 0x%x)\n",
1168 vp7_feature_name[i], s->feature_value[i][index]);
1172 } else if (s->segmentation.update_map) {
1173 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1174 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1175 } else if (s->segmentation.enabled)
1176 *segment = ref ? *ref : *segment;
1177 mb->segment = *segment;
1179 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1182 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1183 vp8_pred16x16_prob_intra);
1185 if (mb->mode == MODE_I4x4) {
1186 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1188 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1189 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1191 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1193 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1194 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1197 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1198 vp8_pred8x8c_prob_intra);
1199 mb->ref_frame = VP56_FRAME_CURRENT;
1200 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1202 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1204 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1205 : VP56_FRAME_GOLDEN;
1207 mb->ref_frame = VP56_FRAME_PREVIOUS;
1208 s->ref_count[mb->ref_frame - 1]++;
1210 // motion vectors, 16.3
1212 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1214 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1217 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1219 if (mb->mode == MODE_I4x4)
1220 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1222 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1224 mb->ref_frame = VP56_FRAME_CURRENT;
1225 mb->partitioning = VP8_SPLITMVMODE_NONE;
1226 AV_ZERO32(&mb->bmv[0]);
1231 * @param r arithmetic bitstream reader context
1232 * @param block destination for block coefficients
1233 * @param probs probabilities to use when reading trees from the bitstream
1234 * @param i initial coeff index, 0 unless a separate DC block is coded
1235 * @param qmul array holding the dc/ac dequant factor at position 0/1
1237 * @return 0 if no coeffs were decoded
1238 * otherwise, the index of the last coeff decoded plus one
1240 static av_always_inline
1241 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1242 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1243 int i, uint8_t *token_prob, int16_t qmul[2],
1244 const uint8_t scan[16], int vp7)
1246 VP56RangeCoder c = *r;
1251 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1255 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1257 break; // invalid input; blocks should end with EOB
1258 token_prob = probs[i][0];
1264 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1266 token_prob = probs[i + 1][1];
1268 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1269 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1271 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1275 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1276 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1277 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1278 } else { // DCT_CAT2
1280 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1281 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1283 } else { // DCT_CAT3 and up
1284 int a = vp56_rac_get_prob(&c, token_prob[8]);
1285 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1286 int cat = (a << 1) + b;
1287 coeff = 3 + (8 << cat);
1288 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1291 token_prob = probs[i + 1][2];
1293 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1300 static av_always_inline
1301 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1303 int16_t dc = block[0];
1311 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1312 block[0] = pred[0] = dc;
1317 block[0] = pred[0] = dc;
1323 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1325 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1326 int i, uint8_t *token_prob,
1328 const uint8_t scan[16])
1330 return decode_block_coeffs_internal(r, block, probs, i,
1331 token_prob, qmul, scan, IS_VP7);
1334 #ifndef vp8_decode_block_coeffs_internal
1335 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1337 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338 int i, uint8_t *token_prob,
1341 return decode_block_coeffs_internal(r, block, probs, i,
1342 token_prob, qmul, zigzag_scan, IS_VP8);
1347 * @param c arithmetic bitstream reader context
1348 * @param block destination for block coefficients
1349 * @param probs probabilities to use when reading trees from the bitstream
1350 * @param i initial coeff index, 0 unless a separate DC block is coded
1351 * @param zero_nhood the initial prediction context for number of surrounding
1352 * all-zero blocks (only left/top, so 0-2)
1353 * @param qmul array holding the dc/ac dequant factor at position 0/1
1354 * @param scan scan pattern (VP7 only)
1356 * @return 0 if no coeffs were decoded
1357 * otherwise, the index of the last coeff decoded plus one
1359 static av_always_inline
1360 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1361 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1362 int i, int zero_nhood, int16_t qmul[2],
1363 const uint8_t scan[16], int vp7)
1365 uint8_t *token_prob = probs[i][zero_nhood];
1366 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1368 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1369 token_prob, qmul, scan)
1370 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1374 static av_always_inline
1375 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1376 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1379 int i, x, y, luma_start = 0, luma_ctx = 3;
1380 int nnz_pred, nnz, nnz_total = 0;
1381 int segment = mb->segment;
1384 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1385 nnz_pred = t_nnz[8] + l_nnz[8];
1387 // decode DC values and do hadamard
1388 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1389 nnz_pred, s->qmat[segment].luma_dc_qmul,
1390 zigzag_scan, is_vp7);
1391 l_nnz[8] = t_nnz[8] = !!nnz;
1393 if (is_vp7 && mb->mode > MODE_I4x4) {
1394 nnz |= inter_predict_dc(td->block_dc,
1395 s->inter_dc_pred[mb->ref_frame - 1]);
1402 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1404 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1411 for (y = 0; y < 4; y++)
1412 for (x = 0; x < 4; x++) {
1413 nnz_pred = l_nnz[y] + t_nnz[x];
1414 nnz = decode_block_coeffs(c, td->block[y][x],
1415 s->prob->token[luma_ctx],
1416 luma_start, nnz_pred,
1417 s->qmat[segment].luma_qmul,
1418 s->prob[0].scan, is_vp7);
1419 /* nnz+block_dc may be one more than the actual last index,
1420 * but we don't care */
1421 td->non_zero_count_cache[y][x] = nnz + block_dc;
1422 t_nnz[x] = l_nnz[y] = !!nnz;
1427 // TODO: what to do about dimensions? 2nd dim for luma is x,
1428 // but for chroma it's (y<<1)|x
1429 for (i = 4; i < 6; i++)
1430 for (y = 0; y < 2; y++)
1431 for (x = 0; x < 2; x++) {
1432 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1433 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1434 s->prob->token[2], 0, nnz_pred,
1435 s->qmat[segment].chroma_qmul,
1436 s->prob[0].scan, is_vp7);
1437 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1438 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1442 // if there were no coded coeffs despite the macroblock not being marked skip,
1443 // we MUST not do the inner loop filter and should not do IDCT
1444 // Since skip isn't used for bitstream prediction, just manually set it.
1449 static av_always_inline
1450 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1451 uint8_t *src_cb, uint8_t *src_cr,
1452 int linesize, int uvlinesize, int simple)
1454 AV_COPY128(top_border, src_y + 15 * linesize);
1456 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1457 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1461 static av_always_inline
1462 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1463 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1464 int mb_y, int mb_width, int simple, int xchg)
1466 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1468 src_cb -= uvlinesize;
1469 src_cr -= uvlinesize;
1471 #define XCHG(a, b, xchg) \
1479 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1480 XCHG(top_border, src_y, xchg);
1481 XCHG(top_border + 8, src_y + 8, 1);
1482 if (mb_x < mb_width - 1)
1483 XCHG(top_border + 32, src_y + 16, 1);
1485 // only copy chroma for normal loop filter
1486 // or to initialize the top row to 127
1487 if (!simple || !mb_y) {
1488 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1489 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1490 XCHG(top_border + 16, src_cb, 1);
1491 XCHG(top_border + 24, src_cr, 1);
1495 static av_always_inline
1496 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1499 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1501 return mb_y ? mode : LEFT_DC_PRED8x8;
1504 static av_always_inline
1505 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1508 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1510 return mb_y ? mode : HOR_PRED8x8;
1513 static av_always_inline
1514 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1518 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1520 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1522 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1523 case PLANE_PRED8x8: /* TM */
1524 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1529 static av_always_inline
1530 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1533 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1535 return mb_y ? mode : HOR_VP8_PRED;
1539 static av_always_inline
1540 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1541 int *copy_buf, int vp7)
1545 if (!mb_x && mb_y) {
1550 case DIAG_DOWN_LEFT_PRED:
1551 case VERT_LEFT_PRED:
1552 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1560 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1562 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1563 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1564 * as 16x16/8x8 DC */
1565 case DIAG_DOWN_RIGHT_PRED:
1566 case VERT_RIGHT_PRED:
1575 static av_always_inline
1576 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1577 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1579 int x, y, mode, nnz;
1582 /* for the first row, we need to run xchg_mb_border to init the top edge
1583 * to 127 otherwise, skip it if we aren't going to deblock */
1584 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1585 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1586 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1587 s->filter.simple, 1);
1589 if (mb->mode < MODE_I4x4) {
1590 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1591 s->hpc.pred16x16[mode](dst[0], s->linesize);
1593 uint8_t *ptr = dst[0];
1594 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1595 const uint8_t lo = is_vp7 ? 128 : 127;
1596 const uint8_t hi = is_vp7 ? 128 : 129;
1597 uint8_t tr_top[4] = { lo, lo, lo, lo };
1599 // all blocks on the right edge of the macroblock use bottom edge
1600 // the top macroblock for their topright edge
1601 uint8_t *tr_right = ptr - s->linesize + 16;
1603 // if we're on the right edge of the frame, said edge is extended
1604 // from the top macroblock
1605 if (mb_y && mb_x == s->mb_width - 1) {
1606 tr = tr_right[-1] * 0x01010101u;
1607 tr_right = (uint8_t *) &tr;
1611 AV_ZERO128(td->non_zero_count_cache);
1613 for (y = 0; y < 4; y++) {
1614 uint8_t *topright = ptr + 4 - s->linesize;
1615 for (x = 0; x < 4; x++) {
1616 int copy = 0, linesize = s->linesize;
1617 uint8_t *dst = ptr + 4 * x;
1618 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1620 if ((y == 0 || x == 3) && mb_y == 0) {
1623 topright = tr_right;
1625 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1626 mb_y + y, ©, is_vp7);
1628 dst = copy_dst + 12;
1632 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1634 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1638 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1647 copy_dst[11] = ptr[4 * x - 1];
1648 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1649 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1650 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1653 s->hpc.pred4x4[mode](dst, topright, linesize);
1655 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1656 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1657 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1658 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1661 nnz = td->non_zero_count_cache[y][x];
1664 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1665 td->block[y][x], s->linesize);
1667 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1668 td->block[y][x], s->linesize);
1673 ptr += 4 * s->linesize;
1678 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1679 mb_x, mb_y, is_vp7);
1680 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1681 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1683 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1684 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1685 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1686 s->filter.simple, 0);
1689 static const uint8_t subpel_idx[3][8] = {
1690 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1691 // also function pointer index
1692 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1693 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1699 * @param s VP8 decoding context
1700 * @param dst target buffer for block data at block position
1701 * @param ref reference picture buffer at origin (0, 0)
1702 * @param mv motion vector (relative to block position) to get pixel data from
1703 * @param x_off horizontal position of block from origin (0, 0)
1704 * @param y_off vertical position of block from origin (0, 0)
1705 * @param block_w width of block (16, 8 or 4)
1706 * @param block_h height of block (always same as block_w)
1707 * @param width width of src/dst plane data
1708 * @param height height of src/dst plane data
1709 * @param linesize size of a single line of plane data, including padding
1710 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1712 static av_always_inline
1713 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1714 ThreadFrame *ref, const VP56mv *mv,
1715 int x_off, int y_off, int block_w, int block_h,
1716 int width, int height, ptrdiff_t linesize,
1717 vp8_mc_func mc_func[3][3])
1719 uint8_t *src = ref->f->data[0];
1722 int src_linesize = linesize;
1724 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1725 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1727 x_off += mv->x >> 2;
1728 y_off += mv->y >> 2;
1731 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1732 src += y_off * linesize + x_off;
1733 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1734 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1735 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1736 src - my_idx * linesize - mx_idx,
1737 EDGE_EMU_LINESIZE, linesize,
1738 block_w + subpel_idx[1][mx],
1739 block_h + subpel_idx[1][my],
1740 x_off - mx_idx, y_off - my_idx,
1742 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1743 src_linesize = EDGE_EMU_LINESIZE;
1745 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1747 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1748 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1749 linesize, block_h, 0, 0);
1754 * chroma MC function
1756 * @param s VP8 decoding context
1757 * @param dst1 target buffer for block data at block position (U plane)
1758 * @param dst2 target buffer for block data at block position (V plane)
1759 * @param ref reference picture buffer at origin (0, 0)
1760 * @param mv motion vector (relative to block position) to get pixel data from
1761 * @param x_off horizontal position of block from origin (0, 0)
1762 * @param y_off vertical position of block from origin (0, 0)
1763 * @param block_w width of block (16, 8 or 4)
1764 * @param block_h height of block (always same as block_w)
1765 * @param width width of src/dst plane data
1766 * @param height height of src/dst plane data
1767 * @param linesize size of a single line of plane data, including padding
1768 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1770 static av_always_inline
1771 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1772 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1773 int x_off, int y_off, int block_w, int block_h,
1774 int width, int height, ptrdiff_t linesize,
1775 vp8_mc_func mc_func[3][3])
1777 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1780 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1781 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1783 x_off += mv->x >> 3;
1784 y_off += mv->y >> 3;
1787 src1 += y_off * linesize + x_off;
1788 src2 += y_off * linesize + x_off;
1789 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1790 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1791 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1792 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1793 src1 - my_idx * linesize - mx_idx,
1794 EDGE_EMU_LINESIZE, linesize,
1795 block_w + subpel_idx[1][mx],
1796 block_h + subpel_idx[1][my],
1797 x_off - mx_idx, y_off - my_idx, width, height);
1798 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1799 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1801 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1802 src2 - my_idx * linesize - mx_idx,
1803 EDGE_EMU_LINESIZE, linesize,
1804 block_w + subpel_idx[1][mx],
1805 block_h + subpel_idx[1][my],
1806 x_off - mx_idx, y_off - my_idx, width, height);
1807 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1808 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1810 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1811 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1814 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1815 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1816 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1820 static av_always_inline
1821 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1822 ThreadFrame *ref_frame, int x_off, int y_off,
1823 int bx_off, int by_off, int block_w, int block_h,
1824 int width, int height, VP56mv *mv)
1829 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1830 ref_frame, mv, x_off + bx_off, y_off + by_off,
1831 block_w, block_h, width, height, s->linesize,
1832 s->put_pixels_tab[block_w == 8]);
1835 if (s->profile == 3) {
1836 /* this block only applies VP8; it is safe to check
1837 * only the profile, as VP7 profile <= 1 */
1849 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1850 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1851 &uvmv, x_off + bx_off, y_off + by_off,
1852 block_w, block_h, width, height, s->uvlinesize,
1853 s->put_pixels_tab[1 + (block_w == 4)]);
1856 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1857 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1858 static av_always_inline
1859 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1862 /* Don't prefetch refs that haven't been used very often this frame. */
1863 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1864 int x_off = mb_x << 4, y_off = mb_y << 4;
1865 int mx = (mb->mv.x >> 2) + x_off + 8;
1866 int my = (mb->mv.y >> 2) + y_off;
1867 uint8_t **src = s->framep[ref]->tf.f->data;
1868 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1869 /* For threading, a ff_thread_await_progress here might be useful, but
1870 * it actually slows down the decoder. Since a bad prefetch doesn't
1871 * generate bad decoder output, we don't run it here. */
1872 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1873 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1874 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1879 * Apply motion vectors to prediction buffer, chapter 18.
1881 static av_always_inline
1882 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1883 VP8Macroblock *mb, int mb_x, int mb_y)
1885 int x_off = mb_x << 4, y_off = mb_y << 4;
1886 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1887 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1888 VP56mv *bmv = mb->bmv;
1890 switch (mb->partitioning) {
1891 case VP8_SPLITMVMODE_NONE:
1892 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1893 0, 0, 16, 16, width, height, &mb->mv);
1895 case VP8_SPLITMVMODE_4x4: {
1900 for (y = 0; y < 4; y++) {
1901 for (x = 0; x < 4; x++) {
1902 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1903 ref, &bmv[4 * y + x],
1904 4 * x + x_off, 4 * y + y_off, 4, 4,
1905 width, height, s->linesize,
1906 s->put_pixels_tab[2]);
1915 for (y = 0; y < 2; y++) {
1916 for (x = 0; x < 2; x++) {
1917 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1918 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1919 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1920 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1921 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1922 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1923 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1924 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1925 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1926 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1927 if (s->profile == 3) {
1931 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1932 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1933 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1934 width, height, s->uvlinesize,
1935 s->put_pixels_tab[2]);
1940 case VP8_SPLITMVMODE_16x8:
1941 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1942 0, 0, 16, 8, width, height, &bmv[0]);
1943 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1944 0, 8, 16, 8, width, height, &bmv[1]);
1946 case VP8_SPLITMVMODE_8x16:
1947 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1948 0, 0, 8, 16, width, height, &bmv[0]);
1949 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1950 8, 0, 8, 16, width, height, &bmv[1]);
1952 case VP8_SPLITMVMODE_8x8:
1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954 0, 0, 8, 8, width, height, &bmv[0]);
1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956 8, 0, 8, 8, width, height, &bmv[1]);
1957 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1958 0, 8, 8, 8, width, height, &bmv[2]);
1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960 8, 8, 8, 8, width, height, &bmv[3]);
1965 static av_always_inline
1966 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1970 if (mb->mode != MODE_I4x4) {
1971 uint8_t *y_dst = dst[0];
1972 for (y = 0; y < 4; y++) {
1973 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1975 if (nnz4 & ~0x01010101) {
1976 for (x = 0; x < 4; x++) {
1977 if ((uint8_t) nnz4 == 1)
1978 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1981 else if ((uint8_t) nnz4 > 1)
1982 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1990 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1993 y_dst += 4 * s->linesize;
1997 for (ch = 0; ch < 2; ch++) {
1998 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2000 uint8_t *ch_dst = dst[1 + ch];
2001 if (nnz4 & ~0x01010101) {
2002 for (y = 0; y < 2; y++) {
2003 for (x = 0; x < 2; x++) {
2004 if ((uint8_t) nnz4 == 1)
2005 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2006 td->block[4 + ch][(y << 1) + x],
2008 else if ((uint8_t) nnz4 > 1)
2009 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2010 td->block[4 + ch][(y << 1) + x],
2014 goto chroma_idct_end;
2016 ch_dst += 4 * s->uvlinesize;
2019 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2027 static av_always_inline
2028 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2029 VP8FilterStrength *f, int is_vp7)
2031 int interior_limit, filter_level;
2033 if (s->segmentation.enabled) {
2034 filter_level = s->segmentation.filter_level[mb->segment];
2035 if (!s->segmentation.absolute_vals)
2036 filter_level += s->filter.level;
2038 filter_level = s->filter.level;
2040 if (s->lf_delta.enabled) {
2041 filter_level += s->lf_delta.ref[mb->ref_frame];
2042 filter_level += s->lf_delta.mode[mb->mode];
2045 filter_level = av_clip_uintp2(filter_level, 6);
2047 interior_limit = filter_level;
2048 if (s->filter.sharpness) {
2049 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2050 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2052 interior_limit = FFMAX(interior_limit, 1);
2054 f->filter_level = filter_level;
2055 f->inner_limit = interior_limit;
2056 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2057 mb->mode == VP8_MVMODE_SPLIT;
2060 static av_always_inline
2061 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2062 int mb_x, int mb_y, int is_vp7)
2064 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2065 int filter_level = f->filter_level;
2066 int inner_limit = f->inner_limit;
2067 int inner_filter = f->inner_filter;
2068 int linesize = s->linesize;
2069 int uvlinesize = s->uvlinesize;
2070 static const uint8_t hev_thresh_lut[2][64] = {
2071 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2072 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2073 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2075 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2076 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2077 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085 bedge_lim_y = filter_level;
2086 bedge_lim_uv = filter_level * 2;
2087 mbedge_lim = filter_level + 2;
2090 bedge_lim_uv = filter_level * 2 + inner_limit;
2091 mbedge_lim = bedge_lim_y + 4;
2094 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2097 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2098 mbedge_lim, inner_limit, hev_thresh);
2099 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2100 mbedge_lim, inner_limit, hev_thresh);
2103 #define H_LOOP_FILTER_16Y_INNER(cond) \
2104 if (cond && inner_filter) { \
2105 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2106 bedge_lim_y, inner_limit, \
2108 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2109 bedge_lim_y, inner_limit, \
2111 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2112 bedge_lim_y, inner_limit, \
2114 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2115 uvlinesize, bedge_lim_uv, \
2116 inner_limit, hev_thresh); \
2119 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2122 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2123 mbedge_lim, inner_limit, hev_thresh);
2124 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2125 mbedge_lim, inner_limit, hev_thresh);
2129 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2130 linesize, bedge_lim_y,
2131 inner_limit, hev_thresh);
2132 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2133 linesize, bedge_lim_y,
2134 inner_limit, hev_thresh);
2135 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2136 linesize, bedge_lim_y,
2137 inner_limit, hev_thresh);
2138 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2139 dst[2] + 4 * uvlinesize,
2140 uvlinesize, bedge_lim_uv,
2141 inner_limit, hev_thresh);
2144 H_LOOP_FILTER_16Y_INNER(is_vp7)
2147 static av_always_inline
2148 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2151 int mbedge_lim, bedge_lim;
2152 int filter_level = f->filter_level;
2153 int inner_limit = f->inner_limit;
2154 int inner_filter = f->inner_filter;
2155 int linesize = s->linesize;
2160 bedge_lim = 2 * filter_level + inner_limit;
2161 mbedge_lim = bedge_lim + 4;
2164 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2166 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2167 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2168 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2172 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2174 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2175 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2176 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2180 #define MARGIN (16 << 2)
2181 static av_always_inline
2182 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2183 VP8Frame *prev_frame, int is_vp7)
2185 VP8Context *s = avctx->priv_data;
2188 s->mv_min.y = -MARGIN;
2189 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2190 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2191 VP8Macroblock *mb = s->macroblocks_base +
2192 ((s->mb_width + 1) * (mb_y + 1) + 1);
2193 int mb_xy = mb_y * s->mb_width;
2195 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2197 s->mv_min.x = -MARGIN;
2198 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2199 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2201 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2202 DC_PRED * 0x01010101);
2203 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2204 prev_frame && prev_frame->seg_map ?
2205 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2214 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2215 VP8Frame *prev_frame)
2217 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2220 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2221 VP8Frame *prev_frame)
2223 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2227 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2229 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2230 if (otd->thread_mb_pos < tmp) { \
2231 pthread_mutex_lock(&otd->lock); \
2232 td->wait_mb_pos = tmp; \
2234 if (otd->thread_mb_pos >= tmp) \
2236 pthread_cond_wait(&otd->cond, &otd->lock); \
2238 td->wait_mb_pos = INT_MAX; \
2239 pthread_mutex_unlock(&otd->lock); \
2243 #define update_pos(td, mb_y, mb_x) \
2245 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2246 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2248 int is_null = !next_td || !prev_td; \
2249 int pos_check = (is_null) ? 1 \
2250 : (next_td != td && \
2251 pos >= next_td->wait_mb_pos) || \
2253 pos >= prev_td->wait_mb_pos); \
2254 td->thread_mb_pos = pos; \
2255 if (sliced_threading && pos_check) { \
2256 pthread_mutex_lock(&td->lock); \
2257 pthread_cond_broadcast(&td->cond); \
2258 pthread_mutex_unlock(&td->lock); \
2262 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2263 #define update_pos(td, mb_y, mb_x)
2266 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2267 int jobnr, int threadnr, int is_vp7)
2269 VP8Context *s = avctx->priv_data;
2270 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2271 int mb_y = td->thread_mb_pos >> 16;
2272 int mb_x, mb_xy = mb_y * s->mb_width;
2273 int num_jobs = s->num_jobs;
2274 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2275 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2278 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2279 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2280 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2285 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2286 if (mb_y == s->mb_height - 1)
2289 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2290 if (s->mb_layout == 1)
2291 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2293 // Make sure the previous frame has read its segmentation map,
2294 // if we re-use the same map.
2295 if (prev_frame && s->segmentation.enabled &&
2296 !s->segmentation.update_map)
2297 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2298 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2299 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2300 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2303 if (!is_vp7 || mb_y == 0)
2304 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2306 s->mv_min.x = -MARGIN;
2307 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2309 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2310 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2311 if (prev_td != td) {
2312 if (threadnr != 0) {
2313 check_thread_pos(td, prev_td,
2314 mb_x + (is_vp7 ? 2 : 1),
2315 mb_y - (is_vp7 ? 2 : 1));
2317 check_thread_pos(td, prev_td,
2318 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2319 mb_y - (is_vp7 ? 2 : 1));
2323 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2325 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2326 dst[2] - dst[1], 2);
2329 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2330 prev_frame && prev_frame->seg_map ?
2331 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2333 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2336 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2338 if (mb->mode <= MODE_I4x4)
2339 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2341 inter_predict(s, td, dst, mb, mb_x, mb_y);
2343 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2346 idct_mb(s, td, dst, mb);
2348 AV_ZERO64(td->left_nnz);
2349 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2351 /* Reset DC block predictors if they would exist
2352 * if the mb had coefficients */
2353 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2354 td->left_nnz[8] = 0;
2355 s->top_nnz[mb_x][8] = 0;
2359 if (s->deblock_filter)
2360 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2362 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2363 if (s->filter.simple)
2364 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2365 NULL, NULL, s->linesize, 0, 1);
2367 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2368 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2371 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2379 if (mb_x == s->mb_width + 1) {
2380 update_pos(td, mb_y, s->mb_width + 3);
2382 update_pos(td, mb_y, mb_x);
2387 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2388 int jobnr, int threadnr)
2390 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2393 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2394 int jobnr, int threadnr)
2396 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2399 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2400 int jobnr, int threadnr, int is_vp7)
2402 VP8Context *s = avctx->priv_data;
2403 VP8ThreadData *td = &s->thread_data[threadnr];
2404 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2405 AVFrame *curframe = s->curframe->tf.f;
2407 VP8ThreadData *prev_td, *next_td;
2409 curframe->data[0] + 16 * mb_y * s->linesize,
2410 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2411 curframe->data[2] + 8 * mb_y * s->uvlinesize
2414 if (s->mb_layout == 1)
2415 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2417 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2422 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2423 if (mb_y == s->mb_height - 1)
2426 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2428 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2429 VP8FilterStrength *f = &td->filter_strength[mb_x];
2431 check_thread_pos(td, prev_td,
2432 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2434 if (next_td != &s->thread_data[0])
2435 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2437 if (num_jobs == 1) {
2438 if (s->filter.simple)
2439 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2440 NULL, NULL, s->linesize, 0, 1);
2442 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2443 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2446 if (s->filter.simple)
2447 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2449 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2454 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2458 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2459 int jobnr, int threadnr)
2461 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2464 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2465 int jobnr, int threadnr)
2467 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2470 static av_always_inline
2471 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2472 int threadnr, int is_vp7)
2474 VP8Context *s = avctx->priv_data;
2475 VP8ThreadData *td = &s->thread_data[jobnr];
2476 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2477 VP8Frame *curframe = s->curframe;
2478 int mb_y, num_jobs = s->num_jobs;
2480 td->thread_nr = threadnr;
2481 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2482 if (mb_y >= s->mb_height)
2484 td->thread_mb_pos = mb_y << 16;
2485 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2486 if (s->deblock_filter)
2487 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2488 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2493 if (avctx->active_thread_type == FF_THREAD_FRAME)
2494 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2500 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2501 int jobnr, int threadnr)
2503 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2506 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2507 int jobnr, int threadnr)
2509 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2513 static av_always_inline
2514 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2515 AVPacket *avpkt, int is_vp7)
2517 VP8Context *s = avctx->priv_data;
2518 int ret, i, referenced, num_jobs;
2519 enum AVDiscard skip_thresh;
2520 VP8Frame *av_uninit(curframe), *prev_frame;
2523 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2525 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2530 prev_frame = s->framep[VP56_FRAME_CURRENT];
2532 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2533 s->update_altref == VP56_FRAME_CURRENT;
2535 skip_thresh = !referenced ? AVDISCARD_NONREF
2536 : !s->keyframe ? AVDISCARD_NONKEY
2539 if (avctx->skip_frame >= skip_thresh) {
2541 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2544 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2546 // release no longer referenced frames
2547 for (i = 0; i < 5; i++)
2548 if (s->frames[i].tf.f->data[0] &&
2549 &s->frames[i] != prev_frame &&
2550 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2551 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2552 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2553 vp8_release_frame(s, &s->frames[i]);
2555 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2558 avctx->colorspace = AVCOL_SPC_BT470BG;
2560 avctx->color_range = AVCOL_RANGE_JPEG;
2562 avctx->color_range = AVCOL_RANGE_MPEG;
2564 /* Given that arithmetic probabilities are updated every frame, it's quite
2565 * likely that the values we have on a random interframe are complete
2566 * junk if we didn't start decode on a keyframe. So just don't display
2567 * anything rather than junk. */
2568 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2569 !s->framep[VP56_FRAME_GOLDEN] ||
2570 !s->framep[VP56_FRAME_GOLDEN2])) {
2571 av_log(avctx, AV_LOG_WARNING,
2572 "Discarding interframe without a prior keyframe!\n");
2573 ret = AVERROR_INVALIDDATA;
2577 curframe->tf.f->key_frame = s->keyframe;
2578 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2579 : AV_PICTURE_TYPE_P;
2580 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2583 // check if golden and altref are swapped
2584 if (s->update_altref != VP56_FRAME_NONE)
2585 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2587 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2589 if (s->update_golden != VP56_FRAME_NONE)
2590 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2592 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2595 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2597 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2599 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2601 if (avctx->codec->update_thread_context)
2602 ff_thread_finish_setup(avctx);
2604 s->linesize = curframe->tf.f->linesize[0];
2605 s->uvlinesize = curframe->tf.f->linesize[1];
2607 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2608 /* Zero macroblock structures for top/top-left prediction
2609 * from outside the frame. */
2611 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2612 (s->mb_width + 1) * sizeof(*s->macroblocks));
2613 if (!s->mb_layout && s->keyframe)
2614 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2616 memset(s->ref_count, 0, sizeof(s->ref_count));
2618 if (s->mb_layout == 1) {
2619 // Make sure the previous frame has read its segmentation map,
2620 // if we re-use the same map.
2621 if (prev_frame && s->segmentation.enabled &&
2622 !s->segmentation.update_map)
2623 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2625 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2627 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2630 if (avctx->active_thread_type == FF_THREAD_FRAME)
2633 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2634 s->num_jobs = num_jobs;
2635 s->curframe = curframe;
2636 s->prev_frame = prev_frame;
2637 s->mv_min.y = -MARGIN;
2638 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2639 for (i = 0; i < MAX_THREADS; i++) {
2640 s->thread_data[i].thread_mb_pos = 0;
2641 s->thread_data[i].wait_mb_pos = INT_MAX;
2644 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2647 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2650 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2651 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2654 // if future frames don't use the updated probabilities,
2655 // reset them to the values we saved
2656 if (!s->update_probabilities)
2657 s->prob[0] = s->prob[1];
2659 if (!s->invisible) {
2660 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2667 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2671 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2674 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2677 #if CONFIG_VP7_DECODER
2678 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2681 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2683 #endif /* CONFIG_VP7_DECODER */
2685 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2687 VP8Context *s = avctx->priv_data;
2690 vp8_decode_flush_impl(avctx, 1);
2691 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2692 av_frame_free(&s->frames[i].tf.f);
2697 static av_cold int vp8_init_frames(VP8Context *s)
2700 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2701 s->frames[i].tf.f = av_frame_alloc();
2702 if (!s->frames[i].tf.f)
2703 return AVERROR(ENOMEM);
2708 static av_always_inline
2709 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2711 VP8Context *s = avctx->priv_data;
2715 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2716 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2717 avctx->internal->allocate_progress = 1;
2719 ff_videodsp_init(&s->vdsp, 8);
2721 ff_vp78dsp_init(&s->vp8dsp);
2722 if (CONFIG_VP7_DECODER && is_vp7) {
2723 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2724 ff_vp7dsp_init(&s->vp8dsp);
2725 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2726 s->filter_mb_row = vp7_filter_mb_row;
2727 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2728 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2729 ff_vp8dsp_init(&s->vp8dsp);
2730 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2731 s->filter_mb_row = vp8_filter_mb_row;
2734 /* does not change for VP8 */
2735 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2737 if ((ret = vp8_init_frames(s)) < 0) {
2738 ff_vp8_decode_free(avctx);
2745 #if CONFIG_VP7_DECODER
2746 static int vp7_decode_init(AVCodecContext *avctx)
2748 return vp78_decode_init(avctx, IS_VP7);
2750 #endif /* CONFIG_VP7_DECODER */
2752 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2754 return vp78_decode_init(avctx, IS_VP8);
2757 #if CONFIG_VP8_DECODER
2758 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2760 VP8Context *s = avctx->priv_data;
2765 if ((ret = vp8_init_frames(s)) < 0) {
2766 ff_vp8_decode_free(avctx);
2773 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2775 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2776 const AVCodecContext *src)
2778 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2781 if (s->macroblocks_base &&
2782 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2784 s->mb_width = s_src->mb_width;
2785 s->mb_height = s_src->mb_height;
2788 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2789 s->segmentation = s_src->segmentation;
2790 s->lf_delta = s_src->lf_delta;
2791 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2793 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2794 if (s_src->frames[i].tf.f->data[0]) {
2795 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2801 s->framep[0] = REBASE(s_src->next_framep[0]);
2802 s->framep[1] = REBASE(s_src->next_framep[1]);
2803 s->framep[2] = REBASE(s_src->next_framep[2]);
2804 s->framep[3] = REBASE(s_src->next_framep[3]);
2808 #endif /* CONFIG_VP8_DECODER */
2810 #if CONFIG_VP7_DECODER
2811 AVCodec ff_vp7_decoder = {
2813 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2814 .type = AVMEDIA_TYPE_VIDEO,
2815 .id = AV_CODEC_ID_VP7,
2816 .priv_data_size = sizeof(VP8Context),
2817 .init = vp7_decode_init,
2818 .close = ff_vp8_decode_free,
2819 .decode = vp7_decode_frame,
2820 .capabilities = CODEC_CAP_DR1,
2821 .flush = vp8_decode_flush,
2823 #endif /* CONFIG_VP7_DECODER */
2825 #if CONFIG_VP8_DECODER
2826 AVCodec ff_vp8_decoder = {
2828 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2829 .type = AVMEDIA_TYPE_VIDEO,
2830 .id = AV_CODEC_ID_VP8,
2831 .priv_data_size = sizeof(VP8Context),
2832 .init = ff_vp8_decode_init,
2833 .close = ff_vp8_decode_free,
2834 .decode = ff_vp8_decode_frame,
2835 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2836 .flush = vp8_decode_flush,
2837 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2838 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2840 #endif /* CONFIG_VP7_DECODER */