2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void free_buffers(VP8Context *s)
38 av_freep(&s->macroblocks_base);
39 av_freep(&s->filter_strength);
40 av_freep(&s->intra4x4_pred_mode_top);
41 av_freep(&s->top_nnz);
42 av_freep(&s->edge_emu_buffer);
43 av_freep(&s->top_border);
45 s->macroblocks = NULL;
48 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
51 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
54 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
55 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
56 ff_thread_release_buffer(s->avctx, f);
57 return AVERROR(ENOMEM);
62 static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
65 if (f->ref_index[0]) {
66 assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
67 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
68 f->ref_index[0] = NULL;
71 av_freep(&f->ref_index[0]);
73 ff_thread_release_buffer(s->avctx, f);
76 static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
78 VP8Context *s = avctx->priv_data;
81 if (!avctx->is_copy || force) {
82 for (i = 0; i < 5; i++)
83 if (s->frames[i].data[0])
84 vp8_release_frame(s, &s->frames[i], is_close);
86 memset(s->framep, 0, sizeof(s->framep));
89 s->maps_are_invalid = 1;
92 static void vp8_decode_flush(AVCodecContext *avctx)
94 vp8_decode_flush_impl(avctx, 0, 0);
97 static int update_dimensions(VP8Context *s, int width, int height)
99 if (width != s->avctx->width ||
100 height != s->avctx->height) {
101 if (av_image_check_size(width, height, 0, s->avctx))
102 return AVERROR_INVALIDDATA;
104 vp8_decode_flush_impl(s->avctx, 1, 0);
106 avcodec_set_dimensions(s->avctx, width, height);
109 s->mb_width = (s->avctx->coded_width +15) / 16;
110 s->mb_height = (s->avctx->coded_height+15) / 16;
112 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
113 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
114 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
115 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
116 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
118 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
119 !s->top_nnz || !s->top_border)
120 return AVERROR(ENOMEM);
122 s->macroblocks = s->macroblocks_base + 1;
127 static void parse_segment_info(VP8Context *s)
129 VP56RangeCoder *c = &s->c;
132 s->segmentation.update_map = vp8_rac_get(c);
134 if (vp8_rac_get(c)) { // update segment feature data
135 s->segmentation.absolute_vals = vp8_rac_get(c);
137 for (i = 0; i < 4; i++)
138 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
140 for (i = 0; i < 4; i++)
141 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
143 if (s->segmentation.update_map)
144 for (i = 0; i < 3; i++)
145 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
148 static void update_lf_deltas(VP8Context *s)
150 VP56RangeCoder *c = &s->c;
153 for (i = 0; i < 4; i++)
154 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
156 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
157 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
160 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
162 const uint8_t *sizes = buf;
165 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
167 buf += 3*(s->num_coeff_partitions-1);
168 buf_size -= 3*(s->num_coeff_partitions-1);
172 for (i = 0; i < s->num_coeff_partitions-1; i++) {
173 int size = AV_RL24(sizes + 3*i);
174 if (buf_size - size < 0)
177 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
181 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
186 static void get_quants(VP8Context *s)
188 VP56RangeCoder *c = &s->c;
191 int yac_qi = vp8_rac_get_uint(c, 7);
192 int ydc_delta = vp8_rac_get_sint(c, 4);
193 int y2dc_delta = vp8_rac_get_sint(c, 4);
194 int y2ac_delta = vp8_rac_get_sint(c, 4);
195 int uvdc_delta = vp8_rac_get_sint(c, 4);
196 int uvac_delta = vp8_rac_get_sint(c, 4);
198 for (i = 0; i < 4; i++) {
199 if (s->segmentation.enabled) {
200 base_qi = s->segmentation.base_quant[i];
201 if (!s->segmentation.absolute_vals)
206 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
207 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
208 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
209 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
210 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
211 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
213 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
214 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
219 * Determine which buffers golden and altref should be updated with after this frame.
220 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
222 * Intra frames update all 3 references
223 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
224 * If the update (golden|altref) flag is set, it's updated with the current frame
225 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
226 * If the flag is not set, the number read means:
228 * 1: VP56_FRAME_PREVIOUS
229 * 2: update golden with altref, or update altref with golden
231 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
233 VP56RangeCoder *c = &s->c;
236 return VP56_FRAME_CURRENT;
238 switch (vp8_rac_get_uint(c, 2)) {
240 return VP56_FRAME_PREVIOUS;
242 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
244 return VP56_FRAME_NONE;
247 static void update_refs(VP8Context *s)
249 VP56RangeCoder *c = &s->c;
251 int update_golden = vp8_rac_get(c);
252 int update_altref = vp8_rac_get(c);
254 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
255 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
258 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
260 VP56RangeCoder *c = &s->c;
261 int header_size, hscale, vscale, i, j, k, l, m, ret;
262 int width = s->avctx->width;
263 int height = s->avctx->height;
265 s->keyframe = !(buf[0] & 1);
266 s->profile = (buf[0]>>1) & 7;
267 s->invisible = !(buf[0] & 0x10);
268 header_size = AV_RL24(buf) >> 5;
273 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
276 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
277 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
278 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
280 if (header_size > buf_size - 7*s->keyframe) {
281 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
282 return AVERROR_INVALIDDATA;
286 if (AV_RL24(buf) != 0x2a019d) {
287 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
288 return AVERROR_INVALIDDATA;
290 width = AV_RL16(buf+3) & 0x3fff;
291 height = AV_RL16(buf+5) & 0x3fff;
292 hscale = buf[4] >> 6;
293 vscale = buf[6] >> 6;
297 if (hscale || vscale)
298 av_log_missing_feature(s->avctx, "Upscaling", 1);
300 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
301 for (i = 0; i < 4; i++)
302 for (j = 0; j < 16; j++)
303 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
304 sizeof(s->prob->token[i][j]));
305 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
306 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
307 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
308 memset(&s->segmentation, 0, sizeof(s->segmentation));
311 if (!s->macroblocks_base || /* first frame */
312 width != s->avctx->width || height != s->avctx->height) {
313 if ((ret = update_dimensions(s, width, height)) < 0)
317 ff_vp56_init_range_decoder(c, buf, header_size);
319 buf_size -= header_size;
323 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
324 vp8_rac_get(c); // whether we can skip clamping in dsp functions
327 if ((s->segmentation.enabled = vp8_rac_get(c)))
328 parse_segment_info(s);
330 s->segmentation.update_map = 0; // FIXME: move this to some init function?
332 s->filter.simple = vp8_rac_get(c);
333 s->filter.level = vp8_rac_get_uint(c, 6);
334 s->filter.sharpness = vp8_rac_get_uint(c, 3);
336 if ((s->lf_delta.enabled = vp8_rac_get(c)))
340 if (setup_partitions(s, buf, buf_size)) {
341 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
342 return AVERROR_INVALIDDATA;
349 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
350 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
353 // if we aren't saving this frame's probabilities for future frames,
354 // make a copy of the current probabilities
355 if (!(s->update_probabilities = vp8_rac_get(c)))
356 s->prob[1] = s->prob[0];
358 s->update_last = s->keyframe || vp8_rac_get(c);
360 for (i = 0; i < 4; i++)
361 for (j = 0; j < 8; j++)
362 for (k = 0; k < 3; k++)
363 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
364 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
365 int prob = vp8_rac_get_uint(c, 8);
366 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
367 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370 if ((s->mbskip_enabled = vp8_rac_get(c)))
371 s->prob->mbskip = vp8_rac_get_uint(c, 8);
374 s->prob->intra = vp8_rac_get_uint(c, 8);
375 s->prob->last = vp8_rac_get_uint(c, 8);
376 s->prob->golden = vp8_rac_get_uint(c, 8);
379 for (i = 0; i < 4; i++)
380 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
382 for (i = 0; i < 3; i++)
383 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
385 // 17.2 MV probability update
386 for (i = 0; i < 2; i++)
387 for (j = 0; j < 19; j++)
388 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
389 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
395 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
397 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
398 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
402 * Motion vector coding, 17.1.
404 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
408 if (vp56_rac_get_prob_branchy(c, p[0])) {
411 for (i = 0; i < 3; i++)
412 x += vp56_rac_get_prob(c, p[9 + i]) << i;
413 for (i = 9; i > 3; i--)
414 x += vp56_rac_get_prob(c, p[9 + i]) << i;
415 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
419 const uint8_t *ps = p+2;
420 bit = vp56_rac_get_prob(c, *ps);
423 bit = vp56_rac_get_prob(c, *ps);
426 x += vp56_rac_get_prob(c, *ps);
429 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
432 static av_always_inline
433 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
436 return vp8_submv_prob[4-!!left];
438 return vp8_submv_prob[2];
439 return vp8_submv_prob[1-!!left];
443 * Split motion vector prediction, 16.4.
444 * @returns the number of motion vectors parsed (2, 4 or 16)
446 static av_always_inline
447 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
451 VP8Macroblock *top_mb = &mb[2];
452 VP8Macroblock *left_mb = &mb[-1];
453 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
454 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
455 *mbsplits_cur, *firstidx;
456 VP56mv *top_mv = top_mb->bmv;
457 VP56mv *left_mv = left_mb->bmv;
458 VP56mv *cur_mv = mb->bmv;
460 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
461 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
462 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
464 part_idx = VP8_SPLITMVMODE_8x8;
467 part_idx = VP8_SPLITMVMODE_4x4;
470 num = vp8_mbsplit_count[part_idx];
471 mbsplits_cur = vp8_mbsplits[part_idx],
472 firstidx = vp8_mbfirstidx[part_idx];
473 mb->partitioning = part_idx;
475 for (n = 0; n < num; n++) {
477 uint32_t left, above;
478 const uint8_t *submv_prob;
481 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
483 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
485 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
487 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
489 submv_prob = get_submv_prob(left, above);
491 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
492 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
493 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
494 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
495 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
497 AV_ZERO32(&mb->bmv[n]);
500 AV_WN32A(&mb->bmv[n], above);
503 AV_WN32A(&mb->bmv[n], left);
510 static av_always_inline
511 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
513 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
515 mb + 1 /* top-left */ };
516 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
517 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
519 int cur_sign_bias = s->sign_bias[mb->ref_frame];
520 int8_t *sign_bias = s->sign_bias;
522 uint8_t cnt[4] = { 0 };
523 VP56RangeCoder *c = &s->c;
525 AV_ZERO32(&near_mv[0]);
526 AV_ZERO32(&near_mv[1]);
527 AV_ZERO32(&near_mv[2]);
529 /* Process MB on top, left and top-left */
530 #define MV_EDGE_CHECK(n)\
532 VP8Macroblock *edge = mb_edge[n];\
533 int edge_ref = edge->ref_frame;\
534 if (edge_ref != VP56_FRAME_CURRENT) {\
535 uint32_t mv = AV_RN32A(&edge->mv);\
537 if (cur_sign_bias != sign_bias[edge_ref]) {\
538 /* SWAR negate of the values in mv. */\
540 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
542 if (!n || mv != AV_RN32A(&near_mv[idx]))\
543 AV_WN32A(&near_mv[++idx], mv);\
544 cnt[idx] += 1 + (n != 2);\
546 cnt[CNT_ZERO] += 1 + (n != 2);\
554 mb->partitioning = VP8_SPLITMVMODE_NONE;
555 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
556 mb->mode = VP8_MVMODE_MV;
558 /* If we have three distinct MVs, merge first and last if they're the same */
559 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
560 cnt[CNT_NEAREST] += 1;
562 /* Swap near and nearest if necessary */
563 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
564 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
565 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
568 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
569 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
571 /* Choose the best mv out of 0,0 and the nearest mv */
572 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
573 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
574 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
575 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
577 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
578 mb->mode = VP8_MVMODE_SPLIT;
579 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
581 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
582 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
586 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
590 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
594 mb->mode = VP8_MVMODE_ZERO;
600 static av_always_inline
601 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
602 int mb_x, int keyframe)
604 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
607 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
608 uint8_t* const left = s->intra4x4_pred_mode_left;
609 for (y = 0; y < 4; y++) {
610 for (x = 0; x < 4; x++) {
612 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
613 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
614 left[y] = top[x] = *intra4x4;
620 for (i = 0; i < 16; i++)
621 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
625 static av_always_inline
626 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
628 VP56RangeCoder *c = &s->c;
630 if (s->segmentation.update_map) {
631 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
632 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
634 *segment = ref ? *ref : *segment;
635 s->segment = *segment;
637 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
640 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
642 if (mb->mode == MODE_I4x4) {
643 decode_intra4x4_modes(s, c, mb_x, 1);
645 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
646 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
647 AV_WN32A(s->intra4x4_pred_mode_left, modes);
650 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
651 mb->ref_frame = VP56_FRAME_CURRENT;
652 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
654 if (vp56_rac_get_prob_branchy(c, s->prob->last))
655 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
656 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
658 mb->ref_frame = VP56_FRAME_PREVIOUS;
659 s->ref_count[mb->ref_frame-1]++;
661 // motion vectors, 16.3
662 decode_mvs(s, mb, mb_x, mb_y);
665 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
667 if (mb->mode == MODE_I4x4)
668 decode_intra4x4_modes(s, c, mb_x, 0);
670 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
671 mb->ref_frame = VP56_FRAME_CURRENT;
672 mb->partitioning = VP8_SPLITMVMODE_NONE;
673 AV_ZERO32(&mb->bmv[0]);
677 #ifndef decode_block_coeffs_internal
679 * @param c arithmetic bitstream reader context
680 * @param block destination for block coefficients
681 * @param probs probabilities to use when reading trees from the bitstream
682 * @param i initial coeff index, 0 unless a separate DC block is coded
683 * @param qmul array holding the dc/ac dequant factor at position 0/1
684 * @return 0 if no coeffs were decoded
685 * otherwise, the index of the last coeff decoded plus one
687 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
688 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
689 int i, uint8_t *token_prob, int16_t qmul[2])
694 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
698 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
700 return i; // invalid input; blocks should end with EOB
701 token_prob = probs[i][0];
705 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
707 token_prob = probs[i+1][1];
709 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
710 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
712 coeff += vp56_rac_get_prob(c, token_prob[5]);
716 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
717 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
718 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
721 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
722 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
724 } else { // DCT_CAT3 and up
725 int a = vp56_rac_get_prob(c, token_prob[8]);
726 int b = vp56_rac_get_prob(c, token_prob[9+a]);
727 int cat = (a<<1) + b;
728 coeff = 3 + (8<<cat);
729 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
732 token_prob = probs[i+1][2];
734 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
742 * @param c arithmetic bitstream reader context
743 * @param block destination for block coefficients
744 * @param probs probabilities to use when reading trees from the bitstream
745 * @param i initial coeff index, 0 unless a separate DC block is coded
746 * @param zero_nhood the initial prediction context for number of surrounding
747 * all-zero blocks (only left/top, so 0-2)
748 * @param qmul array holding the dc/ac dequant factor at position 0/1
749 * @return 0 if no coeffs were decoded
750 * otherwise, the index of the last coeff decoded plus one
752 static av_always_inline
753 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
754 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
755 int i, int zero_nhood, int16_t qmul[2])
757 uint8_t *token_prob = probs[i][zero_nhood];
758 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
760 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
763 static av_always_inline
764 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
765 uint8_t t_nnz[9], uint8_t l_nnz[9])
767 int i, x, y, luma_start = 0, luma_ctx = 3;
768 int nnz_pred, nnz, nnz_total = 0;
769 int segment = s->segment;
772 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
773 nnz_pred = t_nnz[8] + l_nnz[8];
775 // decode DC values and do hadamard
776 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
777 s->qmat[segment].luma_dc_qmul);
778 l_nnz[8] = t_nnz[8] = !!nnz;
783 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
785 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
792 for (y = 0; y < 4; y++)
793 for (x = 0; x < 4; x++) {
794 nnz_pred = l_nnz[y] + t_nnz[x];
795 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
796 nnz_pred, s->qmat[segment].luma_qmul);
797 // nnz+block_dc may be one more than the actual last index, but we don't care
798 s->non_zero_count_cache[y][x] = nnz + block_dc;
799 t_nnz[x] = l_nnz[y] = !!nnz;
804 // TODO: what to do about dimensions? 2nd dim for luma is x,
805 // but for chroma it's (y<<1)|x
806 for (i = 4; i < 6; i++)
807 for (y = 0; y < 2; y++)
808 for (x = 0; x < 2; x++) {
809 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
810 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
811 nnz_pred, s->qmat[segment].chroma_qmul);
812 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
813 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
817 // if there were no coded coeffs despite the macroblock not being marked skip,
818 // we MUST not do the inner loop filter and should not do IDCT
819 // Since skip isn't used for bitstream prediction, just manually set it.
824 static av_always_inline
825 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
826 int linesize, int uvlinesize, int simple)
828 AV_COPY128(top_border, src_y + 15*linesize);
830 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
831 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
835 static av_always_inline
836 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
837 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
838 int simple, int xchg)
840 uint8_t *top_border_m1 = top_border-32; // for TL prediction
842 src_cb -= uvlinesize;
843 src_cr -= uvlinesize;
845 #define XCHG(a,b,xchg) do { \
846 if (xchg) AV_SWAP64(b,a); \
847 else AV_COPY64(b,a); \
850 XCHG(top_border_m1+8, src_y-8, xchg);
851 XCHG(top_border, src_y, xchg);
852 XCHG(top_border+8, src_y+8, 1);
853 if (mb_x < mb_width-1)
854 XCHG(top_border+32, src_y+16, 1);
856 // only copy chroma for normal loop filter
857 // or to initialize the top row to 127
858 if (!simple || !mb_y) {
859 XCHG(top_border_m1+16, src_cb-8, xchg);
860 XCHG(top_border_m1+24, src_cr-8, xchg);
861 XCHG(top_border+16, src_cb, 1);
862 XCHG(top_border+24, src_cr, 1);
866 static av_always_inline
867 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
870 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
872 return mb_y ? mode : LEFT_DC_PRED8x8;
876 static av_always_inline
877 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
880 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
882 return mb_y ? mode : HOR_PRED8x8;
886 static av_always_inline
887 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
889 if (mode == DC_PRED8x8) {
890 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
896 static av_always_inline
897 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
901 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
903 return !mb_y ? DC_127_PRED8x8 : mode;
905 return !mb_x ? DC_129_PRED8x8 : mode;
906 case PLANE_PRED8x8 /*TM*/:
907 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
912 static av_always_inline
913 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
916 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
918 return mb_y ? mode : HOR_VP8_PRED;
922 static av_always_inline
923 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
932 case DIAG_DOWN_LEFT_PRED:
934 return !mb_y ? DC_127_PRED : mode;
942 return !mb_x ? DC_129_PRED : mode;
944 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
945 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
946 case DIAG_DOWN_RIGHT_PRED:
947 case VERT_RIGHT_PRED:
956 static av_always_inline
957 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
960 AVCodecContext *avctx = s->avctx;
964 // for the first row, we need to run xchg_mb_border to init the top edge to 127
965 // otherwise, skip it if we aren't going to deblock
966 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
967 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
968 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
969 s->filter.simple, 1);
971 if (mb->mode < MODE_I4x4) {
972 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
973 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
975 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
977 s->hpc.pred16x16[mode](dst[0], s->linesize);
979 uint8_t *ptr = dst[0];
980 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
981 uint8_t tr_top[4] = { 127, 127, 127, 127 };
983 // all blocks on the right edge of the macroblock use bottom edge
984 // the top macroblock for their topright edge
985 uint8_t *tr_right = ptr - s->linesize + 16;
987 // if we're on the right edge of the frame, said edge is extended
988 // from the top macroblock
989 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
990 mb_x == s->mb_width-1) {
991 tr = tr_right[-1]*0x01010101u;
992 tr_right = (uint8_t *)&tr;
996 AV_ZERO128(s->non_zero_count_cache);
998 for (y = 0; y < 4; y++) {
999 uint8_t *topright = ptr + 4 - s->linesize;
1000 for (x = 0; x < 4; x++) {
1001 int copy = 0, linesize = s->linesize;
1002 uint8_t *dst = ptr+4*x;
1003 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1005 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1008 topright = tr_right;
1010 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1011 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1013 dst = copy_dst + 12;
1017 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1019 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1023 copy_dst[3] = ptr[4*x-s->linesize-1];
1030 copy_dst[35] = 129U;
1032 copy_dst[11] = ptr[4*x -1];
1033 copy_dst[19] = ptr[4*x+s->linesize -1];
1034 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1035 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1041 s->hpc.pred4x4[mode](dst, topright, linesize);
1043 AV_COPY32(ptr+4*x , copy_dst+12);
1044 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1045 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1046 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1049 nnz = s->non_zero_count_cache[y][x];
1052 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1054 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1059 ptr += 4*s->linesize;
1064 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1065 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1067 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1069 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1070 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1072 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1073 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1074 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1075 s->filter.simple, 0);
1078 static const uint8_t subpel_idx[3][8] = {
1079 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1080 // also function pointer index
1081 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1082 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1088 * @param s VP8 decoding context
1089 * @param dst target buffer for block data at block position
1090 * @param ref reference picture buffer at origin (0, 0)
1091 * @param mv motion vector (relative to block position) to get pixel data from
1092 * @param x_off horizontal position of block from origin (0, 0)
1093 * @param y_off vertical position of block from origin (0, 0)
1094 * @param block_w width of block (16, 8 or 4)
1095 * @param block_h height of block (always same as block_w)
1096 * @param width width of src/dst plane data
1097 * @param height height of src/dst plane data
1098 * @param linesize size of a single line of plane data, including padding
1099 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1101 static av_always_inline
1102 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1103 int x_off, int y_off, int block_w, int block_h,
1104 int width, int height, int linesize,
1105 vp8_mc_func mc_func[3][3])
1107 uint8_t *src = ref->data[0];
1111 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1112 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1114 x_off += mv->x >> 2;
1115 y_off += mv->y >> 2;
1118 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1119 src += y_off * linesize + x_off;
1120 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1121 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1122 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1123 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1124 x_off - mx_idx, y_off - my_idx, width, height);
1125 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1127 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1129 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1130 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1135 * chroma MC function
1137 * @param s VP8 decoding context
1138 * @param dst1 target buffer for block data at block position (U plane)
1139 * @param dst2 target buffer for block data at block position (V plane)
1140 * @param ref reference picture buffer at origin (0, 0)
1141 * @param mv motion vector (relative to block position) to get pixel data from
1142 * @param x_off horizontal position of block from origin (0, 0)
1143 * @param y_off vertical position of block from origin (0, 0)
1144 * @param block_w width of block (16, 8 or 4)
1145 * @param block_h height of block (always same as block_w)
1146 * @param width width of src/dst plane data
1147 * @param height height of src/dst plane data
1148 * @param linesize size of a single line of plane data, including padding
1149 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1151 static av_always_inline
1152 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1153 const VP56mv *mv, int x_off, int y_off,
1154 int block_w, int block_h, int width, int height, int linesize,
1155 vp8_mc_func mc_func[3][3])
1157 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1160 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1161 int my = mv->y&7, my_idx = subpel_idx[0][my];
1163 x_off += mv->x >> 3;
1164 y_off += mv->y >> 3;
1167 src1 += y_off * linesize + x_off;
1168 src2 += y_off * linesize + x_off;
1169 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1170 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1171 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1172 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1173 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1174 x_off - mx_idx, y_off - my_idx, width, height);
1175 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1176 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1178 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1179 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1180 x_off - mx_idx, y_off - my_idx, width, height);
1181 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1182 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1184 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1185 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1188 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1189 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1190 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1194 static av_always_inline
1195 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1196 AVFrame *ref_frame, int x_off, int y_off,
1197 int bx_off, int by_off,
1198 int block_w, int block_h,
1199 int width, int height, VP56mv *mv)
1204 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1205 ref_frame, mv, x_off + bx_off, y_off + by_off,
1206 block_w, block_h, width, height, s->linesize,
1207 s->put_pixels_tab[block_w == 8]);
1210 if (s->profile == 3) {
1214 x_off >>= 1; y_off >>= 1;
1215 bx_off >>= 1; by_off >>= 1;
1216 width >>= 1; height >>= 1;
1217 block_w >>= 1; block_h >>= 1;
1218 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1219 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1220 &uvmv, x_off + bx_off, y_off + by_off,
1221 block_w, block_h, width, height, s->uvlinesize,
1222 s->put_pixels_tab[1 + (block_w == 4)]);
1225 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1226 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1227 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1229 /* Don't prefetch refs that haven't been used very often this frame. */
1230 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1231 int x_off = mb_x << 4, y_off = mb_y << 4;
1232 int mx = (mb->mv.x>>2) + x_off + 8;
1233 int my = (mb->mv.y>>2) + y_off;
1234 uint8_t **src= s->framep[ref]->data;
1235 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1236 /* For threading, a ff_thread_await_progress here might be useful, but
1237 * it actually slows down the decoder. Since a bad prefetch doesn't
1238 * generate bad decoder output, we don't run it here. */
1239 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1240 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1241 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1246 * Apply motion vectors to prediction buffer, chapter 18.
1248 static av_always_inline
1249 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1252 int x_off = mb_x << 4, y_off = mb_y << 4;
1253 int width = 16*s->mb_width, height = 16*s->mb_height;
1254 AVFrame *ref = s->framep[mb->ref_frame];
1255 VP56mv *bmv = mb->bmv;
1257 switch (mb->partitioning) {
1258 case VP8_SPLITMVMODE_NONE:
1259 vp8_mc_part(s, dst, ref, x_off, y_off,
1260 0, 0, 16, 16, width, height, &mb->mv);
1262 case VP8_SPLITMVMODE_4x4: {
1267 for (y = 0; y < 4; y++) {
1268 for (x = 0; x < 4; x++) {
1269 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1271 4*x + x_off, 4*y + y_off, 4, 4,
1272 width, height, s->linesize,
1273 s->put_pixels_tab[2]);
1278 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1279 for (y = 0; y < 2; y++) {
1280 for (x = 0; x < 2; x++) {
1281 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1282 mb->bmv[ 2*y * 4 + 2*x+1].x +
1283 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1284 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1285 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1286 mb->bmv[ 2*y * 4 + 2*x+1].y +
1287 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1288 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1289 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1290 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1291 if (s->profile == 3) {
1295 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1296 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1297 4*x + x_off, 4*y + y_off, 4, 4,
1298 width, height, s->uvlinesize,
1299 s->put_pixels_tab[2]);
1304 case VP8_SPLITMVMODE_16x8:
1305 vp8_mc_part(s, dst, ref, x_off, y_off,
1306 0, 0, 16, 8, width, height, &bmv[0]);
1307 vp8_mc_part(s, dst, ref, x_off, y_off,
1308 0, 8, 16, 8, width, height, &bmv[1]);
1310 case VP8_SPLITMVMODE_8x16:
1311 vp8_mc_part(s, dst, ref, x_off, y_off,
1312 0, 0, 8, 16, width, height, &bmv[0]);
1313 vp8_mc_part(s, dst, ref, x_off, y_off,
1314 8, 0, 8, 16, width, height, &bmv[1]);
1316 case VP8_SPLITMVMODE_8x8:
1317 vp8_mc_part(s, dst, ref, x_off, y_off,
1318 0, 0, 8, 8, width, height, &bmv[0]);
1319 vp8_mc_part(s, dst, ref, x_off, y_off,
1320 8, 0, 8, 8, width, height, &bmv[1]);
1321 vp8_mc_part(s, dst, ref, x_off, y_off,
1322 0, 8, 8, 8, width, height, &bmv[2]);
1323 vp8_mc_part(s, dst, ref, x_off, y_off,
1324 8, 8, 8, 8, width, height, &bmv[3]);
1329 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1333 if (mb->mode != MODE_I4x4) {
1334 uint8_t *y_dst = dst[0];
1335 for (y = 0; y < 4; y++) {
1336 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1338 if (nnz4&~0x01010101) {
1339 for (x = 0; x < 4; x++) {
1340 if ((uint8_t)nnz4 == 1)
1341 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1342 else if((uint8_t)nnz4 > 1)
1343 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1349 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1352 y_dst += 4*s->linesize;
1356 for (ch = 0; ch < 2; ch++) {
1357 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1359 uint8_t *ch_dst = dst[1+ch];
1360 if (nnz4&~0x01010101) {
1361 for (y = 0; y < 2; y++) {
1362 for (x = 0; x < 2; x++) {
1363 if ((uint8_t)nnz4 == 1)
1364 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1365 else if((uint8_t)nnz4 > 1)
1366 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1369 goto chroma_idct_end;
1371 ch_dst += 4*s->uvlinesize;
1374 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1381 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1383 int interior_limit, filter_level;
1385 if (s->segmentation.enabled) {
1386 filter_level = s->segmentation.filter_level[s->segment];
1387 if (!s->segmentation.absolute_vals)
1388 filter_level += s->filter.level;
1390 filter_level = s->filter.level;
1392 if (s->lf_delta.enabled) {
1393 filter_level += s->lf_delta.ref[mb->ref_frame];
1394 filter_level += s->lf_delta.mode[mb->mode];
1397 filter_level = av_clip_uintp2(filter_level, 6);
1399 interior_limit = filter_level;
1400 if (s->filter.sharpness) {
1401 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1402 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1404 interior_limit = FFMAX(interior_limit, 1);
1406 f->filter_level = filter_level;
1407 f->inner_limit = interior_limit;
1408 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1411 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1413 int mbedge_lim, bedge_lim, hev_thresh;
1414 int filter_level = f->filter_level;
1415 int inner_limit = f->inner_limit;
1416 int inner_filter = f->inner_filter;
1417 int linesize = s->linesize;
1418 int uvlinesize = s->uvlinesize;
1419 static const uint8_t hev_thresh_lut[2][64] = {
1420 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1421 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1422 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1424 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1425 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1426 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1433 bedge_lim = 2*filter_level + inner_limit;
1434 mbedge_lim = bedge_lim + 4;
1436 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1439 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1440 mbedge_lim, inner_limit, hev_thresh);
1441 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1442 mbedge_lim, inner_limit, hev_thresh);
1446 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1447 inner_limit, hev_thresh);
1448 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1449 inner_limit, hev_thresh);
1450 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1451 inner_limit, hev_thresh);
1452 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1453 uvlinesize, bedge_lim,
1454 inner_limit, hev_thresh);
1458 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1459 mbedge_lim, inner_limit, hev_thresh);
1460 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1461 mbedge_lim, inner_limit, hev_thresh);
1465 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1466 linesize, bedge_lim,
1467 inner_limit, hev_thresh);
1468 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1469 linesize, bedge_lim,
1470 inner_limit, hev_thresh);
1471 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1472 linesize, bedge_lim,
1473 inner_limit, hev_thresh);
1474 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1475 dst[2] + 4 * uvlinesize,
1476 uvlinesize, bedge_lim,
1477 inner_limit, hev_thresh);
1481 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1483 int mbedge_lim, bedge_lim;
1484 int filter_level = f->filter_level;
1485 int inner_limit = f->inner_limit;
1486 int inner_filter = f->inner_filter;
1487 int linesize = s->linesize;
1492 bedge_lim = 2*filter_level + inner_limit;
1493 mbedge_lim = bedge_lim + 4;
1496 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1498 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1499 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1500 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1504 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1506 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1507 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1508 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1512 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1514 VP8FilterStrength *f = s->filter_strength;
1516 curframe->data[0] + 16*mb_y*s->linesize,
1517 curframe->data[1] + 8*mb_y*s->uvlinesize,
1518 curframe->data[2] + 8*mb_y*s->uvlinesize
1522 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1523 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1524 filter_mb(s, dst, f++, mb_x, mb_y);
1531 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1533 VP8FilterStrength *f = s->filter_strength;
1534 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1537 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1538 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1539 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1544 static void release_queued_segmaps(VP8Context *s, int is_close)
1546 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1547 while (s->num_maps_to_be_freed > leave_behind)
1548 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1549 s->maps_are_invalid = 0;
1552 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1555 VP8Context *s = avctx->priv_data;
1556 int ret, mb_x, mb_y, i, y, referenced;
1557 enum AVDiscard skip_thresh;
1558 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1560 release_queued_segmaps(s, 0);
1562 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1565 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1566 || s->update_altref == VP56_FRAME_CURRENT;
1568 skip_thresh = !referenced ? AVDISCARD_NONREF :
1569 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1571 if (avctx->skip_frame >= skip_thresh) {
1575 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1577 // release no longer referenced frames
1578 for (i = 0; i < 5; i++)
1579 if (s->frames[i].data[0] &&
1580 &s->frames[i] != prev_frame &&
1581 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1582 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1583 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1584 vp8_release_frame(s, &s->frames[i], 0);
1586 // find a free buffer
1587 for (i = 0; i < 5; i++)
1588 if (&s->frames[i] != prev_frame &&
1589 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1590 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1591 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1592 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1596 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1599 if (curframe->data[0])
1600 ff_thread_release_buffer(avctx, curframe);
1602 curframe->key_frame = s->keyframe;
1603 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1604 curframe->reference = referenced ? 3 : 0;
1605 if ((ret = vp8_alloc_frame(s, curframe))) {
1606 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1610 // check if golden and altref are swapped
1611 if (s->update_altref != VP56_FRAME_NONE) {
1612 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1614 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1616 if (s->update_golden != VP56_FRAME_NONE) {
1617 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1619 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1621 if (s->update_last) {
1622 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1624 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1626 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1628 ff_thread_finish_setup(avctx);
1630 // Given that arithmetic probabilities are updated every frame, it's quite likely
1631 // that the values we have on a random interframe are complete junk if we didn't
1632 // start decode on a keyframe. So just don't display anything rather than junk.
1633 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1634 !s->framep[VP56_FRAME_GOLDEN] ||
1635 !s->framep[VP56_FRAME_GOLDEN2])) {
1636 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1637 return AVERROR_INVALIDDATA;
1640 s->linesize = curframe->linesize[0];
1641 s->uvlinesize = curframe->linesize[1];
1643 if (!s->edge_emu_buffer)
1644 s->edge_emu_buffer = av_malloc(21*s->linesize);
1646 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1648 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1649 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1651 // top edge of 127 for intra prediction
1652 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1653 s->top_border[0][15] = s->top_border[0][23] = 127;
1654 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1656 memset(s->ref_count, 0, sizeof(s->ref_count));
1658 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1660 #define MARGIN (16 << 2)
1661 s->mv_min.y = -MARGIN;
1662 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1664 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1665 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1666 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1667 int mb_xy = mb_y*s->mb_width;
1669 curframe->data[0] + 16*mb_y*s->linesize,
1670 curframe->data[1] + 8*mb_y*s->uvlinesize,
1671 curframe->data[2] + 8*mb_y*s->uvlinesize
1674 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1675 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1676 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1678 // left edge of 129 for intra prediction
1679 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1680 for (i = 0; i < 3; i++)
1681 for (y = 0; y < 16>>!!i; y++)
1682 dst[i][y*curframe->linesize[i]-1] = 129;
1683 if (mb_y == 1) // top left edge is also 129
1684 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1687 s->mv_min.x = -MARGIN;
1688 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1689 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1690 ff_thread_await_progress(prev_frame, mb_y, 0);
1692 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1693 /* Prefetch the current frame, 4 MBs ahead */
1694 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1695 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1697 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1698 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
1700 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1703 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1705 if (mb->mode <= MODE_I4x4)
1706 intra_predict(s, dst, mb, mb_x, mb_y);
1708 inter_predict(s, dst, mb, mb_x, mb_y);
1710 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1713 idct_mb(s, dst, mb);
1715 AV_ZERO64(s->left_nnz);
1716 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1718 // Reset DC block predictors if they would exist if the mb had coefficients
1719 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1721 s->top_nnz[mb_x][8] = 0;
1725 if (s->deblock_filter)
1726 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1728 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1736 if (s->deblock_filter) {
1737 if (s->filter.simple)
1738 filter_mb_row_simple(s, curframe, mb_y);
1740 filter_mb_row(s, curframe, mb_y);
1745 ff_thread_report_progress(curframe, mb_y, 0);
1748 ff_thread_report_progress(curframe, INT_MAX, 0);
1750 // if future frames don't use the updated probabilities,
1751 // reset them to the values we saved
1752 if (!s->update_probabilities)
1753 s->prob[0] = s->prob[1];
1755 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1757 if (!s->invisible) {
1758 *(AVFrame*)data = *curframe;
1759 *data_size = sizeof(AVFrame);
1765 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1767 VP8Context *s = avctx->priv_data;
1770 avctx->pix_fmt = PIX_FMT_YUV420P;
1772 dsputil_init(&s->dsp, avctx);
1773 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1774 ff_vp8dsp_init(&s->vp8dsp);
1779 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1781 vp8_decode_flush_impl(avctx, 0, 1);
1782 release_queued_segmaps(avctx->priv_data, 1);
1786 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1788 VP8Context *s = avctx->priv_data;
1795 #define REBASE(pic) \
1796 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1798 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1800 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1802 if (s->macroblocks_base &&
1803 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1807 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1808 s->segmentation = s_src->segmentation;
1809 s->lf_delta = s_src->lf_delta;
1810 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1812 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1813 s->framep[0] = REBASE(s_src->next_framep[0]);
1814 s->framep[1] = REBASE(s_src->next_framep[1]);
1815 s->framep[2] = REBASE(s_src->next_framep[2]);
1816 s->framep[3] = REBASE(s_src->next_framep[3]);
1821 AVCodec ff_vp8_decoder = {
1823 .type = AVMEDIA_TYPE_VIDEO,
1825 .priv_data_size = sizeof(VP8Context),
1826 .init = vp8_decode_init,
1827 .close = vp8_decode_free,
1828 .decode = vp8_decode_frame,
1829 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1830 .flush = vp8_decode_flush,
1831 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1832 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1833 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),