2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void free_buffers(VP8Context *s)
38 av_freep(&s->macroblocks_base);
39 av_freep(&s->filter_strength);
40 av_freep(&s->intra4x4_pred_mode_top);
41 av_freep(&s->top_nnz);
42 av_freep(&s->edge_emu_buffer);
43 av_freep(&s->top_border);
45 s->macroblocks = NULL;
48 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
51 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
53 if (!s->maps_are_invalid && s->num_maps_to_be_freed) {
54 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
55 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
56 ff_thread_release_buffer(s->avctx, f);
57 return AVERROR(ENOMEM);
62 static void vp8_release_frame(VP8Context *s, AVFrame *f, int is_close)
65 if (f->ref_index[0]) {
66 assert(s->num_maps_to_be_freed < FF_ARRAY_ELEMS(s->segmentation_maps));
67 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
68 f->ref_index[0] = NULL;
71 av_freep(&f->ref_index[0]);
73 ff_thread_release_buffer(s->avctx, f);
76 static void vp8_decode_flush_impl(AVCodecContext *avctx, int force, int is_close)
78 VP8Context *s = avctx->priv_data;
81 if (!avctx->is_copy || force) {
82 for (i = 0; i < 5; i++)
83 if (s->frames[i].data[0])
84 vp8_release_frame(s, &s->frames[i], is_close);
86 memset(s->framep, 0, sizeof(s->framep));
89 s->maps_are_invalid = 1;
92 static void vp8_decode_flush(AVCodecContext *avctx)
94 vp8_decode_flush_impl(avctx, 0, 0);
97 static int update_dimensions(VP8Context *s, int width, int height)
99 if (width != s->avctx->width ||
100 height != s->avctx->height) {
101 if (av_image_check_size(width, height, 0, s->avctx))
102 return AVERROR_INVALIDDATA;
104 vp8_decode_flush_impl(s->avctx, 1, 0);
106 avcodec_set_dimensions(s->avctx, width, height);
109 s->mb_width = (s->avctx->coded_width +15) / 16;
110 s->mb_height = (s->avctx->coded_height+15) / 16;
112 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
113 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
114 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
115 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
116 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
118 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
119 !s->top_nnz || !s->top_border)
120 return AVERROR(ENOMEM);
122 s->macroblocks = s->macroblocks_base + 1;
127 static void parse_segment_info(VP8Context *s)
129 VP56RangeCoder *c = &s->c;
132 s->segmentation.update_map = vp8_rac_get(c);
134 if (vp8_rac_get(c)) { // update segment feature data
135 s->segmentation.absolute_vals = vp8_rac_get(c);
137 for (i = 0; i < 4; i++)
138 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
140 for (i = 0; i < 4; i++)
141 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
143 if (s->segmentation.update_map)
144 for (i = 0; i < 3; i++)
145 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
148 static void update_lf_deltas(VP8Context *s)
150 VP56RangeCoder *c = &s->c;
153 for (i = 0; i < 4; i++)
154 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
156 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
157 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
160 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
162 const uint8_t *sizes = buf;
165 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
167 buf += 3*(s->num_coeff_partitions-1);
168 buf_size -= 3*(s->num_coeff_partitions-1);
172 for (i = 0; i < s->num_coeff_partitions-1; i++) {
173 int size = AV_RL24(sizes + 3*i);
174 if (buf_size - size < 0)
177 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
181 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
186 static void get_quants(VP8Context *s)
188 VP56RangeCoder *c = &s->c;
191 int yac_qi = vp8_rac_get_uint(c, 7);
192 int ydc_delta = vp8_rac_get_sint(c, 4);
193 int y2dc_delta = vp8_rac_get_sint(c, 4);
194 int y2ac_delta = vp8_rac_get_sint(c, 4);
195 int uvdc_delta = vp8_rac_get_sint(c, 4);
196 int uvac_delta = vp8_rac_get_sint(c, 4);
198 for (i = 0; i < 4; i++) {
199 if (s->segmentation.enabled) {
200 base_qi = s->segmentation.base_quant[i];
201 if (!s->segmentation.absolute_vals)
206 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
207 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
208 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
209 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
210 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
211 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
213 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
214 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
219 * Determine which buffers golden and altref should be updated with after this frame.
220 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
222 * Intra frames update all 3 references
223 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
224 * If the update (golden|altref) flag is set, it's updated with the current frame
225 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
226 * If the flag is not set, the number read means:
228 * 1: VP56_FRAME_PREVIOUS
229 * 2: update golden with altref, or update altref with golden
231 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
233 VP56RangeCoder *c = &s->c;
236 return VP56_FRAME_CURRENT;
238 switch (vp8_rac_get_uint(c, 2)) {
240 return VP56_FRAME_PREVIOUS;
242 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
244 return VP56_FRAME_NONE;
247 static void update_refs(VP8Context *s)
249 VP56RangeCoder *c = &s->c;
251 int update_golden = vp8_rac_get(c);
252 int update_altref = vp8_rac_get(c);
254 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
255 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
258 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
260 VP56RangeCoder *c = &s->c;
261 int header_size, hscale, vscale, i, j, k, l, m, ret;
262 int width = s->avctx->width;
263 int height = s->avctx->height;
265 s->keyframe = !(buf[0] & 1);
266 s->profile = (buf[0]>>1) & 7;
267 s->invisible = !(buf[0] & 0x10);
268 header_size = AV_RL24(buf) >> 5;
273 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
276 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
277 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
278 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
280 if (header_size > buf_size - 7*s->keyframe) {
281 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
282 return AVERROR_INVALIDDATA;
286 if (AV_RL24(buf) != 0x2a019d) {
287 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
288 return AVERROR_INVALIDDATA;
290 width = AV_RL16(buf+3) & 0x3fff;
291 height = AV_RL16(buf+5) & 0x3fff;
292 hscale = buf[4] >> 6;
293 vscale = buf[6] >> 6;
297 if (hscale || vscale)
298 av_log_missing_feature(s->avctx, "Upscaling", 1);
300 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
301 for (i = 0; i < 4; i++)
302 for (j = 0; j < 16; j++)
303 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
304 sizeof(s->prob->token[i][j]));
305 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
306 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
307 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
308 memset(&s->segmentation, 0, sizeof(s->segmentation));
311 if (!s->macroblocks_base || /* first frame */
312 width != s->avctx->width || height != s->avctx->height) {
313 if ((ret = update_dimensions(s, width, height)) < 0)
317 ff_vp56_init_range_decoder(c, buf, header_size);
319 buf_size -= header_size;
323 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
324 vp8_rac_get(c); // whether we can skip clamping in dsp functions
327 if ((s->segmentation.enabled = vp8_rac_get(c)))
328 parse_segment_info(s);
330 s->segmentation.update_map = 0; // FIXME: move this to some init function?
332 s->filter.simple = vp8_rac_get(c);
333 s->filter.level = vp8_rac_get_uint(c, 6);
334 s->filter.sharpness = vp8_rac_get_uint(c, 3);
336 if ((s->lf_delta.enabled = vp8_rac_get(c)))
340 if (setup_partitions(s, buf, buf_size)) {
341 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
342 return AVERROR_INVALIDDATA;
349 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
350 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
353 // if we aren't saving this frame's probabilities for future frames,
354 // make a copy of the current probabilities
355 if (!(s->update_probabilities = vp8_rac_get(c)))
356 s->prob[1] = s->prob[0];
358 s->update_last = s->keyframe || vp8_rac_get(c);
360 for (i = 0; i < 4; i++)
361 for (j = 0; j < 8; j++)
362 for (k = 0; k < 3; k++)
363 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
364 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
365 int prob = vp8_rac_get_uint(c, 8);
366 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
367 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
370 if ((s->mbskip_enabled = vp8_rac_get(c)))
371 s->prob->mbskip = vp8_rac_get_uint(c, 8);
374 s->prob->intra = vp8_rac_get_uint(c, 8);
375 s->prob->last = vp8_rac_get_uint(c, 8);
376 s->prob->golden = vp8_rac_get_uint(c, 8);
379 for (i = 0; i < 4; i++)
380 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
382 for (i = 0; i < 3; i++)
383 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
385 // 17.2 MV probability update
386 for (i = 0; i < 2; i++)
387 for (j = 0; j < 19; j++)
388 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
389 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
395 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
397 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
398 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
402 * Motion vector coding, 17.1.
404 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
408 if (vp56_rac_get_prob_branchy(c, p[0])) {
411 for (i = 0; i < 3; i++)
412 x += vp56_rac_get_prob(c, p[9 + i]) << i;
413 for (i = 9; i > 3; i--)
414 x += vp56_rac_get_prob(c, p[9 + i]) << i;
415 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
419 const uint8_t *ps = p+2;
420 bit = vp56_rac_get_prob(c, *ps);
423 bit = vp56_rac_get_prob(c, *ps);
426 x += vp56_rac_get_prob(c, *ps);
429 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
432 static av_always_inline
433 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
436 return vp8_submv_prob[4-!!left];
438 return vp8_submv_prob[2];
439 return vp8_submv_prob[1-!!left];
443 * Split motion vector prediction, 16.4.
444 * @returns the number of motion vectors parsed (2, 4 or 16)
446 static av_always_inline
447 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
451 VP8Macroblock *top_mb = &mb[2];
452 VP8Macroblock *left_mb = &mb[-1];
453 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
454 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
455 *mbsplits_cur, *firstidx;
456 VP56mv *top_mv = top_mb->bmv;
457 VP56mv *left_mv = left_mb->bmv;
458 VP56mv *cur_mv = mb->bmv;
460 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
461 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
462 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
464 part_idx = VP8_SPLITMVMODE_8x8;
467 part_idx = VP8_SPLITMVMODE_4x4;
470 num = vp8_mbsplit_count[part_idx];
471 mbsplits_cur = vp8_mbsplits[part_idx],
472 firstidx = vp8_mbfirstidx[part_idx];
473 mb->partitioning = part_idx;
475 for (n = 0; n < num; n++) {
477 uint32_t left, above;
478 const uint8_t *submv_prob;
481 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
483 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
485 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
487 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
489 submv_prob = get_submv_prob(left, above);
491 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
492 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
493 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
494 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
495 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
497 AV_ZERO32(&mb->bmv[n]);
500 AV_WN32A(&mb->bmv[n], above);
503 AV_WN32A(&mb->bmv[n], left);
510 static av_always_inline
511 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
513 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
515 mb + 1 /* top-left */ };
516 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
517 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
519 int cur_sign_bias = s->sign_bias[mb->ref_frame];
520 int8_t *sign_bias = s->sign_bias;
522 uint8_t cnt[4] = { 0 };
523 VP56RangeCoder *c = &s->c;
525 AV_ZERO32(&near_mv[0]);
526 AV_ZERO32(&near_mv[1]);
527 AV_ZERO32(&near_mv[2]);
529 /* Process MB on top, left and top-left */
530 #define MV_EDGE_CHECK(n)\
532 VP8Macroblock *edge = mb_edge[n];\
533 int edge_ref = edge->ref_frame;\
534 if (edge_ref != VP56_FRAME_CURRENT) {\
535 uint32_t mv = AV_RN32A(&edge->mv);\
537 if (cur_sign_bias != sign_bias[edge_ref]) {\
538 /* SWAR negate of the values in mv. */\
540 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
542 if (!n || mv != AV_RN32A(&near_mv[idx]))\
543 AV_WN32A(&near_mv[++idx], mv);\
544 cnt[idx] += 1 + (n != 2);\
546 cnt[CNT_ZERO] += 1 + (n != 2);\
554 mb->partitioning = VP8_SPLITMVMODE_NONE;
555 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
556 mb->mode = VP8_MVMODE_MV;
558 /* If we have three distinct MVs, merge first and last if they're the same */
559 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
560 cnt[CNT_NEAREST] += 1;
562 /* Swap near and nearest if necessary */
563 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
564 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
565 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
568 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
569 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
571 /* Choose the best mv out of 0,0 and the nearest mv */
572 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
573 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
574 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
575 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
577 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
578 mb->mode = VP8_MVMODE_SPLIT;
579 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
581 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
582 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
586 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
590 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
594 mb->mode = VP8_MVMODE_ZERO;
600 static av_always_inline
601 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
602 int mb_x, int keyframe)
604 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
607 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
608 uint8_t* const left = s->intra4x4_pred_mode_left;
609 for (y = 0; y < 4; y++) {
610 for (x = 0; x < 4; x++) {
612 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
613 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
614 left[y] = top[x] = *intra4x4;
620 for (i = 0; i < 16; i++)
621 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
625 static av_always_inline
626 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
628 VP56RangeCoder *c = &s->c;
630 if (s->segmentation.update_map)
631 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
633 *segment = ref ? *ref : *segment;
634 s->segment = *segment;
636 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
639 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
641 if (mb->mode == MODE_I4x4) {
642 decode_intra4x4_modes(s, c, mb_x, 1);
644 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
645 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
646 AV_WN32A(s->intra4x4_pred_mode_left, modes);
649 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
650 mb->ref_frame = VP56_FRAME_CURRENT;
651 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
653 if (vp56_rac_get_prob_branchy(c, s->prob->last))
654 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
655 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
657 mb->ref_frame = VP56_FRAME_PREVIOUS;
658 s->ref_count[mb->ref_frame-1]++;
660 // motion vectors, 16.3
661 decode_mvs(s, mb, mb_x, mb_y);
664 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
666 if (mb->mode == MODE_I4x4)
667 decode_intra4x4_modes(s, c, mb_x, 0);
669 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
670 mb->ref_frame = VP56_FRAME_CURRENT;
671 mb->partitioning = VP8_SPLITMVMODE_NONE;
672 AV_ZERO32(&mb->bmv[0]);
676 #ifndef decode_block_coeffs_internal
678 * @param c arithmetic bitstream reader context
679 * @param block destination for block coefficients
680 * @param probs probabilities to use when reading trees from the bitstream
681 * @param i initial coeff index, 0 unless a separate DC block is coded
682 * @param qmul array holding the dc/ac dequant factor at position 0/1
683 * @return 0 if no coeffs were decoded
684 * otherwise, the index of the last coeff decoded plus one
686 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
687 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
688 int i, uint8_t *token_prob, int16_t qmul[2])
693 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
697 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
699 return i; // invalid input; blocks should end with EOB
700 token_prob = probs[i][0];
704 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
706 token_prob = probs[i+1][1];
708 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
709 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
711 coeff += vp56_rac_get_prob(c, token_prob[5]);
715 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
716 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
717 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
720 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
721 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
723 } else { // DCT_CAT3 and up
724 int a = vp56_rac_get_prob(c, token_prob[8]);
725 int b = vp56_rac_get_prob(c, token_prob[9+a]);
726 int cat = (a<<1) + b;
727 coeff = 3 + (8<<cat);
728 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
731 token_prob = probs[i+1][2];
733 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
741 * @param c arithmetic bitstream reader context
742 * @param block destination for block coefficients
743 * @param probs probabilities to use when reading trees from the bitstream
744 * @param i initial coeff index, 0 unless a separate DC block is coded
745 * @param zero_nhood the initial prediction context for number of surrounding
746 * all-zero blocks (only left/top, so 0-2)
747 * @param qmul array holding the dc/ac dequant factor at position 0/1
748 * @return 0 if no coeffs were decoded
749 * otherwise, the index of the last coeff decoded plus one
751 static av_always_inline
752 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
753 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
754 int i, int zero_nhood, int16_t qmul[2])
756 uint8_t *token_prob = probs[i][zero_nhood];
757 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
759 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
762 static av_always_inline
763 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
764 uint8_t t_nnz[9], uint8_t l_nnz[9])
766 int i, x, y, luma_start = 0, luma_ctx = 3;
767 int nnz_pred, nnz, nnz_total = 0;
768 int segment = s->segment;
771 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
772 nnz_pred = t_nnz[8] + l_nnz[8];
774 // decode DC values and do hadamard
775 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
776 s->qmat[segment].luma_dc_qmul);
777 l_nnz[8] = t_nnz[8] = !!nnz;
782 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
784 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
791 for (y = 0; y < 4; y++)
792 for (x = 0; x < 4; x++) {
793 nnz_pred = l_nnz[y] + t_nnz[x];
794 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
795 nnz_pred, s->qmat[segment].luma_qmul);
796 // nnz+block_dc may be one more than the actual last index, but we don't care
797 s->non_zero_count_cache[y][x] = nnz + block_dc;
798 t_nnz[x] = l_nnz[y] = !!nnz;
803 // TODO: what to do about dimensions? 2nd dim for luma is x,
804 // but for chroma it's (y<<1)|x
805 for (i = 4; i < 6; i++)
806 for (y = 0; y < 2; y++)
807 for (x = 0; x < 2; x++) {
808 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
809 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
810 nnz_pred, s->qmat[segment].chroma_qmul);
811 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
812 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
816 // if there were no coded coeffs despite the macroblock not being marked skip,
817 // we MUST not do the inner loop filter and should not do IDCT
818 // Since skip isn't used for bitstream prediction, just manually set it.
823 static av_always_inline
824 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
825 int linesize, int uvlinesize, int simple)
827 AV_COPY128(top_border, src_y + 15*linesize);
829 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
830 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
834 static av_always_inline
835 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
836 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
837 int simple, int xchg)
839 uint8_t *top_border_m1 = top_border-32; // for TL prediction
841 src_cb -= uvlinesize;
842 src_cr -= uvlinesize;
844 #define XCHG(a,b,xchg) do { \
845 if (xchg) AV_SWAP64(b,a); \
846 else AV_COPY64(b,a); \
849 XCHG(top_border_m1+8, src_y-8, xchg);
850 XCHG(top_border, src_y, xchg);
851 XCHG(top_border+8, src_y+8, 1);
852 if (mb_x < mb_width-1)
853 XCHG(top_border+32, src_y+16, 1);
855 // only copy chroma for normal loop filter
856 // or to initialize the top row to 127
857 if (!simple || !mb_y) {
858 XCHG(top_border_m1+16, src_cb-8, xchg);
859 XCHG(top_border_m1+24, src_cr-8, xchg);
860 XCHG(top_border+16, src_cb, 1);
861 XCHG(top_border+24, src_cr, 1);
865 static av_always_inline
866 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
869 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
871 return mb_y ? mode : LEFT_DC_PRED8x8;
875 static av_always_inline
876 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
879 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
881 return mb_y ? mode : HOR_PRED8x8;
885 static av_always_inline
886 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
888 if (mode == DC_PRED8x8) {
889 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
895 static av_always_inline
896 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
900 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
902 return !mb_y ? DC_127_PRED8x8 : mode;
904 return !mb_x ? DC_129_PRED8x8 : mode;
905 case PLANE_PRED8x8 /*TM*/:
906 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
911 static av_always_inline
912 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
915 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
917 return mb_y ? mode : HOR_VP8_PRED;
921 static av_always_inline
922 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
931 case DIAG_DOWN_LEFT_PRED:
933 return !mb_y ? DC_127_PRED : mode;
941 return !mb_x ? DC_129_PRED : mode;
943 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
944 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
945 case DIAG_DOWN_RIGHT_PRED:
946 case VERT_RIGHT_PRED:
955 static av_always_inline
956 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
959 AVCodecContext *avctx = s->avctx;
963 // for the first row, we need to run xchg_mb_border to init the top edge to 127
964 // otherwise, skip it if we aren't going to deblock
965 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
966 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
967 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
968 s->filter.simple, 1);
970 if (mb->mode < MODE_I4x4) {
971 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
972 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
974 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
976 s->hpc.pred16x16[mode](dst[0], s->linesize);
978 uint8_t *ptr = dst[0];
979 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
980 uint8_t tr_top[4] = { 127, 127, 127, 127 };
982 // all blocks on the right edge of the macroblock use bottom edge
983 // the top macroblock for their topright edge
984 uint8_t *tr_right = ptr - s->linesize + 16;
986 // if we're on the right edge of the frame, said edge is extended
987 // from the top macroblock
988 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
989 mb_x == s->mb_width-1) {
990 tr = tr_right[-1]*0x01010101u;
991 tr_right = (uint8_t *)&tr;
995 AV_ZERO128(s->non_zero_count_cache);
997 for (y = 0; y < 4; y++) {
998 uint8_t *topright = ptr + 4 - s->linesize;
999 for (x = 0; x < 4; x++) {
1000 int copy = 0, linesize = s->linesize;
1001 uint8_t *dst = ptr+4*x;
1002 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1004 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1007 topright = tr_right;
1009 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1010 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1012 dst = copy_dst + 12;
1016 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1018 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1022 copy_dst[3] = ptr[4*x-s->linesize-1];
1029 copy_dst[35] = 129U;
1031 copy_dst[11] = ptr[4*x -1];
1032 copy_dst[19] = ptr[4*x+s->linesize -1];
1033 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1034 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1040 s->hpc.pred4x4[mode](dst, topright, linesize);
1042 AV_COPY32(ptr+4*x , copy_dst+12);
1043 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1044 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1045 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1048 nnz = s->non_zero_count_cache[y][x];
1051 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1053 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1058 ptr += 4*s->linesize;
1063 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1064 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1066 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1068 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1069 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1071 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1072 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1073 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1074 s->filter.simple, 0);
1077 static const uint8_t subpel_idx[3][8] = {
1078 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1079 // also function pointer index
1080 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1081 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1087 * @param s VP8 decoding context
1088 * @param dst target buffer for block data at block position
1089 * @param ref reference picture buffer at origin (0, 0)
1090 * @param mv motion vector (relative to block position) to get pixel data from
1091 * @param x_off horizontal position of block from origin (0, 0)
1092 * @param y_off vertical position of block from origin (0, 0)
1093 * @param block_w width of block (16, 8 or 4)
1094 * @param block_h height of block (always same as block_w)
1095 * @param width width of src/dst plane data
1096 * @param height height of src/dst plane data
1097 * @param linesize size of a single line of plane data, including padding
1098 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1100 static av_always_inline
1101 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1102 int x_off, int y_off, int block_w, int block_h,
1103 int width, int height, int linesize,
1104 vp8_mc_func mc_func[3][3])
1106 uint8_t *src = ref->data[0];
1110 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1111 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1113 x_off += mv->x >> 2;
1114 y_off += mv->y >> 2;
1117 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1118 src += y_off * linesize + x_off;
1119 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1120 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1121 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1122 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1123 x_off - mx_idx, y_off - my_idx, width, height);
1124 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1126 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1128 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1129 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1134 * chroma MC function
1136 * @param s VP8 decoding context
1137 * @param dst1 target buffer for block data at block position (U plane)
1138 * @param dst2 target buffer for block data at block position (V plane)
1139 * @param ref reference picture buffer at origin (0, 0)
1140 * @param mv motion vector (relative to block position) to get pixel data from
1141 * @param x_off horizontal position of block from origin (0, 0)
1142 * @param y_off vertical position of block from origin (0, 0)
1143 * @param block_w width of block (16, 8 or 4)
1144 * @param block_h height of block (always same as block_w)
1145 * @param width width of src/dst plane data
1146 * @param height height of src/dst plane data
1147 * @param linesize size of a single line of plane data, including padding
1148 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1150 static av_always_inline
1151 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1152 const VP56mv *mv, int x_off, int y_off,
1153 int block_w, int block_h, int width, int height, int linesize,
1154 vp8_mc_func mc_func[3][3])
1156 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1159 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1160 int my = mv->y&7, my_idx = subpel_idx[0][my];
1162 x_off += mv->x >> 3;
1163 y_off += mv->y >> 3;
1166 src1 += y_off * linesize + x_off;
1167 src2 += y_off * linesize + x_off;
1168 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1169 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1170 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1171 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1172 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1173 x_off - mx_idx, y_off - my_idx, width, height);
1174 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1175 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1177 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1178 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1179 x_off - mx_idx, y_off - my_idx, width, height);
1180 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1181 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1183 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1184 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1187 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1188 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1189 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1193 static av_always_inline
1194 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1195 AVFrame *ref_frame, int x_off, int y_off,
1196 int bx_off, int by_off,
1197 int block_w, int block_h,
1198 int width, int height, VP56mv *mv)
1203 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1204 ref_frame, mv, x_off + bx_off, y_off + by_off,
1205 block_w, block_h, width, height, s->linesize,
1206 s->put_pixels_tab[block_w == 8]);
1209 if (s->profile == 3) {
1213 x_off >>= 1; y_off >>= 1;
1214 bx_off >>= 1; by_off >>= 1;
1215 width >>= 1; height >>= 1;
1216 block_w >>= 1; block_h >>= 1;
1217 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1218 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1219 &uvmv, x_off + bx_off, y_off + by_off,
1220 block_w, block_h, width, height, s->uvlinesize,
1221 s->put_pixels_tab[1 + (block_w == 4)]);
1224 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1225 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1226 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1228 /* Don't prefetch refs that haven't been used very often this frame. */
1229 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1230 int x_off = mb_x << 4, y_off = mb_y << 4;
1231 int mx = (mb->mv.x>>2) + x_off + 8;
1232 int my = (mb->mv.y>>2) + y_off;
1233 uint8_t **src= s->framep[ref]->data;
1234 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1235 /* For threading, a ff_thread_await_progress here might be useful, but
1236 * it actually slows down the decoder. Since a bad prefetch doesn't
1237 * generate bad decoder output, we don't run it here. */
1238 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1239 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1240 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1245 * Apply motion vectors to prediction buffer, chapter 18.
1247 static av_always_inline
1248 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1251 int x_off = mb_x << 4, y_off = mb_y << 4;
1252 int width = 16*s->mb_width, height = 16*s->mb_height;
1253 AVFrame *ref = s->framep[mb->ref_frame];
1254 VP56mv *bmv = mb->bmv;
1256 switch (mb->partitioning) {
1257 case VP8_SPLITMVMODE_NONE:
1258 vp8_mc_part(s, dst, ref, x_off, y_off,
1259 0, 0, 16, 16, width, height, &mb->mv);
1261 case VP8_SPLITMVMODE_4x4: {
1266 for (y = 0; y < 4; y++) {
1267 for (x = 0; x < 4; x++) {
1268 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1270 4*x + x_off, 4*y + y_off, 4, 4,
1271 width, height, s->linesize,
1272 s->put_pixels_tab[2]);
1277 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1278 for (y = 0; y < 2; y++) {
1279 for (x = 0; x < 2; x++) {
1280 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1281 mb->bmv[ 2*y * 4 + 2*x+1].x +
1282 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1283 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1284 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1285 mb->bmv[ 2*y * 4 + 2*x+1].y +
1286 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1287 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1288 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1289 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1290 if (s->profile == 3) {
1294 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1295 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1296 4*x + x_off, 4*y + y_off, 4, 4,
1297 width, height, s->uvlinesize,
1298 s->put_pixels_tab[2]);
1303 case VP8_SPLITMVMODE_16x8:
1304 vp8_mc_part(s, dst, ref, x_off, y_off,
1305 0, 0, 16, 8, width, height, &bmv[0]);
1306 vp8_mc_part(s, dst, ref, x_off, y_off,
1307 0, 8, 16, 8, width, height, &bmv[1]);
1309 case VP8_SPLITMVMODE_8x16:
1310 vp8_mc_part(s, dst, ref, x_off, y_off,
1311 0, 0, 8, 16, width, height, &bmv[0]);
1312 vp8_mc_part(s, dst, ref, x_off, y_off,
1313 8, 0, 8, 16, width, height, &bmv[1]);
1315 case VP8_SPLITMVMODE_8x8:
1316 vp8_mc_part(s, dst, ref, x_off, y_off,
1317 0, 0, 8, 8, width, height, &bmv[0]);
1318 vp8_mc_part(s, dst, ref, x_off, y_off,
1319 8, 0, 8, 8, width, height, &bmv[1]);
1320 vp8_mc_part(s, dst, ref, x_off, y_off,
1321 0, 8, 8, 8, width, height, &bmv[2]);
1322 vp8_mc_part(s, dst, ref, x_off, y_off,
1323 8, 8, 8, 8, width, height, &bmv[3]);
1328 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1332 if (mb->mode != MODE_I4x4) {
1333 uint8_t *y_dst = dst[0];
1334 for (y = 0; y < 4; y++) {
1335 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1337 if (nnz4&~0x01010101) {
1338 for (x = 0; x < 4; x++) {
1339 if ((uint8_t)nnz4 == 1)
1340 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1341 else if((uint8_t)nnz4 > 1)
1342 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1348 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1351 y_dst += 4*s->linesize;
1355 for (ch = 0; ch < 2; ch++) {
1356 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1358 uint8_t *ch_dst = dst[1+ch];
1359 if (nnz4&~0x01010101) {
1360 for (y = 0; y < 2; y++) {
1361 for (x = 0; x < 2; x++) {
1362 if ((uint8_t)nnz4 == 1)
1363 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1364 else if((uint8_t)nnz4 > 1)
1365 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1368 goto chroma_idct_end;
1370 ch_dst += 4*s->uvlinesize;
1373 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1380 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1382 int interior_limit, filter_level;
1384 if (s->segmentation.enabled) {
1385 filter_level = s->segmentation.filter_level[s->segment];
1386 if (!s->segmentation.absolute_vals)
1387 filter_level += s->filter.level;
1389 filter_level = s->filter.level;
1391 if (s->lf_delta.enabled) {
1392 filter_level += s->lf_delta.ref[mb->ref_frame];
1393 filter_level += s->lf_delta.mode[mb->mode];
1396 filter_level = av_clip_uintp2(filter_level, 6);
1398 interior_limit = filter_level;
1399 if (s->filter.sharpness) {
1400 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1401 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1403 interior_limit = FFMAX(interior_limit, 1);
1405 f->filter_level = filter_level;
1406 f->inner_limit = interior_limit;
1407 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1410 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1412 int mbedge_lim, bedge_lim, hev_thresh;
1413 int filter_level = f->filter_level;
1414 int inner_limit = f->inner_limit;
1415 int inner_filter = f->inner_filter;
1416 int linesize = s->linesize;
1417 int uvlinesize = s->uvlinesize;
1418 static const uint8_t hev_thresh_lut[2][64] = {
1419 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1420 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1421 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1423 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1425 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1432 bedge_lim = 2*filter_level + inner_limit;
1433 mbedge_lim = bedge_lim + 4;
1435 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1438 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1439 mbedge_lim, inner_limit, hev_thresh);
1440 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1441 mbedge_lim, inner_limit, hev_thresh);
1445 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1446 inner_limit, hev_thresh);
1447 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1448 inner_limit, hev_thresh);
1449 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1450 inner_limit, hev_thresh);
1451 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1452 uvlinesize, bedge_lim,
1453 inner_limit, hev_thresh);
1457 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1458 mbedge_lim, inner_limit, hev_thresh);
1459 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1460 mbedge_lim, inner_limit, hev_thresh);
1464 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1465 linesize, bedge_lim,
1466 inner_limit, hev_thresh);
1467 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1468 linesize, bedge_lim,
1469 inner_limit, hev_thresh);
1470 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1471 linesize, bedge_lim,
1472 inner_limit, hev_thresh);
1473 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1474 dst[2] + 4 * uvlinesize,
1475 uvlinesize, bedge_lim,
1476 inner_limit, hev_thresh);
1480 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1482 int mbedge_lim, bedge_lim;
1483 int filter_level = f->filter_level;
1484 int inner_limit = f->inner_limit;
1485 int inner_filter = f->inner_filter;
1486 int linesize = s->linesize;
1491 bedge_lim = 2*filter_level + inner_limit;
1492 mbedge_lim = bedge_lim + 4;
1495 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1497 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1498 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1499 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1503 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1505 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1506 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1507 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1511 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1513 VP8FilterStrength *f = s->filter_strength;
1515 curframe->data[0] + 16*mb_y*s->linesize,
1516 curframe->data[1] + 8*mb_y*s->uvlinesize,
1517 curframe->data[2] + 8*mb_y*s->uvlinesize
1521 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1522 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1523 filter_mb(s, dst, f++, mb_x, mb_y);
1530 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1532 VP8FilterStrength *f = s->filter_strength;
1533 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1536 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1537 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1538 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1543 static void release_queued_segmaps(VP8Context *s, int is_close)
1545 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1546 while (s->num_maps_to_be_freed > leave_behind)
1547 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1548 s->maps_are_invalid = 0;
1551 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1554 VP8Context *s = avctx->priv_data;
1555 int ret, mb_x, mb_y, i, y, referenced;
1556 enum AVDiscard skip_thresh;
1557 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1559 release_queued_segmaps(s, 0);
1561 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1564 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1565 || s->update_altref == VP56_FRAME_CURRENT;
1567 skip_thresh = !referenced ? AVDISCARD_NONREF :
1568 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1570 if (avctx->skip_frame >= skip_thresh) {
1574 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1576 // release no longer referenced frames
1577 for (i = 0; i < 5; i++)
1578 if (s->frames[i].data[0] &&
1579 &s->frames[i] != prev_frame &&
1580 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1581 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1582 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1583 vp8_release_frame(s, &s->frames[i], 0);
1585 // find a free buffer
1586 for (i = 0; i < 5; i++)
1587 if (&s->frames[i] != prev_frame &&
1588 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1589 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1590 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1591 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1595 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1598 if (curframe->data[0])
1599 ff_thread_release_buffer(avctx, curframe);
1601 curframe->key_frame = s->keyframe;
1602 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1603 curframe->reference = referenced ? 3 : 0;
1604 if ((ret = vp8_alloc_frame(s, curframe))) {
1605 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1609 // check if golden and altref are swapped
1610 if (s->update_altref != VP56_FRAME_NONE) {
1611 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1613 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1615 if (s->update_golden != VP56_FRAME_NONE) {
1616 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1618 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1620 if (s->update_last) {
1621 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1623 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1625 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1627 ff_thread_finish_setup(avctx);
1629 // Given that arithmetic probabilities are updated every frame, it's quite likely
1630 // that the values we have on a random interframe are complete junk if we didn't
1631 // start decode on a keyframe. So just don't display anything rather than junk.
1632 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1633 !s->framep[VP56_FRAME_GOLDEN] ||
1634 !s->framep[VP56_FRAME_GOLDEN2])) {
1635 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1636 return AVERROR_INVALIDDATA;
1639 s->linesize = curframe->linesize[0];
1640 s->uvlinesize = curframe->linesize[1];
1642 if (!s->edge_emu_buffer)
1643 s->edge_emu_buffer = av_malloc(21*s->linesize);
1645 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1647 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1648 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1650 // top edge of 127 for intra prediction
1651 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1652 s->top_border[0][15] = s->top_border[0][23] = 127;
1653 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1655 memset(s->ref_count, 0, sizeof(s->ref_count));
1657 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1659 #define MARGIN (16 << 2)
1660 s->mv_min.y = -MARGIN;
1661 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1663 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1664 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1665 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1666 int mb_xy = mb_y*s->mb_width;
1668 curframe->data[0] + 16*mb_y*s->linesize,
1669 curframe->data[1] + 8*mb_y*s->uvlinesize,
1670 curframe->data[2] + 8*mb_y*s->uvlinesize
1673 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1674 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1675 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1677 // left edge of 129 for intra prediction
1678 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1679 for (i = 0; i < 3; i++)
1680 for (y = 0; y < 16>>!!i; y++)
1681 dst[i][y*curframe->linesize[i]-1] = 129;
1682 if (mb_y == 1) // top left edge is also 129
1683 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1686 s->mv_min.x = -MARGIN;
1687 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1688 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1689 ff_thread_await_progress(prev_frame, mb_y, 0);
1691 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1692 /* Prefetch the current frame, 4 MBs ahead */
1693 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1694 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1696 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1697 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
1699 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1702 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1704 if (mb->mode <= MODE_I4x4)
1705 intra_predict(s, dst, mb, mb_x, mb_y);
1707 inter_predict(s, dst, mb, mb_x, mb_y);
1709 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1712 idct_mb(s, dst, mb);
1714 AV_ZERO64(s->left_nnz);
1715 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1717 // Reset DC block predictors if they would exist if the mb had coefficients
1718 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1720 s->top_nnz[mb_x][8] = 0;
1724 if (s->deblock_filter)
1725 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1727 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1735 if (s->deblock_filter) {
1736 if (s->filter.simple)
1737 filter_mb_row_simple(s, curframe, mb_y);
1739 filter_mb_row(s, curframe, mb_y);
1744 ff_thread_report_progress(curframe, mb_y, 0);
1747 ff_thread_report_progress(curframe, INT_MAX, 0);
1749 // if future frames don't use the updated probabilities,
1750 // reset them to the values we saved
1751 if (!s->update_probabilities)
1752 s->prob[0] = s->prob[1];
1754 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1756 if (!s->invisible) {
1757 *(AVFrame*)data = *curframe;
1758 *data_size = sizeof(AVFrame);
1764 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1766 VP8Context *s = avctx->priv_data;
1769 avctx->pix_fmt = PIX_FMT_YUV420P;
1771 dsputil_init(&s->dsp, avctx);
1772 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1773 ff_vp8dsp_init(&s->vp8dsp);
1778 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1780 vp8_decode_flush_impl(avctx, 0, 1);
1781 release_queued_segmaps(avctx->priv_data, 1);
1785 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1787 VP8Context *s = avctx->priv_data;
1794 #define REBASE(pic) \
1795 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1797 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1799 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1801 if (s->macroblocks_base &&
1802 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1806 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1807 s->segmentation = s_src->segmentation;
1808 s->lf_delta = s_src->lf_delta;
1809 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1811 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1812 s->framep[0] = REBASE(s_src->next_framep[0]);
1813 s->framep[1] = REBASE(s_src->next_framep[1]);
1814 s->framep[2] = REBASE(s_src->next_framep[2]);
1815 s->framep[3] = REBASE(s_src->next_framep[3]);
1820 AVCodec ff_vp8_decoder = {
1822 .type = AVMEDIA_TYPE_VIDEO,
1824 .priv_data_size = sizeof(VP8Context),
1825 .init = vp8_decode_init,
1826 .close = vp8_decode_free,
1827 .decode = vp8_decode_frame,
1828 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1829 .flush = vp8_decode_flush,
1830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1831 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1832 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),