2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
35 static void vp8_decode_flush(AVCodecContext *avctx)
37 VP8Context *s = avctx->priv_data;
40 for (i = 0; i < 4; i++)
41 if (s->frames[i].data[0])
42 avctx->release_buffer(avctx, &s->frames[i]);
43 memset(s->framep, 0, sizeof(s->framep));
45 av_freep(&s->macroblocks_base);
46 av_freep(&s->filter_strength);
47 av_freep(&s->intra4x4_pred_mode_top);
48 av_freep(&s->top_nnz);
49 av_freep(&s->edge_emu_buffer);
50 av_freep(&s->top_border);
51 av_freep(&s->segmentation_map);
53 s->macroblocks = NULL;
56 static int update_dimensions(VP8Context *s, int width, int height)
58 if (av_image_check_size(width, height, 0, s->avctx))
59 return AVERROR_INVALIDDATA;
61 vp8_decode_flush(s->avctx);
63 avcodec_set_dimensions(s->avctx, width, height);
65 s->mb_width = (s->avctx->coded_width +15) / 16;
66 s->mb_height = (s->avctx->coded_height+15) / 16;
68 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
69 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
70 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
71 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
72 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
73 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
75 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
76 !s->top_nnz || !s->top_border || !s->segmentation_map)
77 return AVERROR(ENOMEM);
79 s->macroblocks = s->macroblocks_base + 1;
84 static void parse_segment_info(VP8Context *s)
86 VP56RangeCoder *c = &s->c;
89 s->segmentation.update_map = vp8_rac_get(c);
91 if (vp8_rac_get(c)) { // update segment feature data
92 s->segmentation.absolute_vals = vp8_rac_get(c);
94 for (i = 0; i < 4; i++)
95 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
97 for (i = 0; i < 4; i++)
98 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
100 if (s->segmentation.update_map)
101 for (i = 0; i < 3; i++)
102 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
105 static void update_lf_deltas(VP8Context *s)
107 VP56RangeCoder *c = &s->c;
110 for (i = 0; i < 4; i++)
111 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
113 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
114 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
117 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
119 const uint8_t *sizes = buf;
122 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
124 buf += 3*(s->num_coeff_partitions-1);
125 buf_size -= 3*(s->num_coeff_partitions-1);
129 for (i = 0; i < s->num_coeff_partitions-1; i++) {
130 int size = AV_RL24(sizes + 3*i);
131 if (buf_size - size < 0)
134 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
138 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
143 static void get_quants(VP8Context *s)
145 VP56RangeCoder *c = &s->c;
148 int yac_qi = vp8_rac_get_uint(c, 7);
149 int ydc_delta = vp8_rac_get_sint(c, 4);
150 int y2dc_delta = vp8_rac_get_sint(c, 4);
151 int y2ac_delta = vp8_rac_get_sint(c, 4);
152 int uvdc_delta = vp8_rac_get_sint(c, 4);
153 int uvac_delta = vp8_rac_get_sint(c, 4);
155 for (i = 0; i < 4; i++) {
156 if (s->segmentation.enabled) {
157 base_qi = s->segmentation.base_quant[i];
158 if (!s->segmentation.absolute_vals)
163 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
164 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)];
165 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
166 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
167 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
168 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
170 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
171 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
176 * Determine which buffers golden and altref should be updated with after this frame.
177 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
179 * Intra frames update all 3 references
180 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
181 * If the update (golden|altref) flag is set, it's updated with the current frame
182 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
183 * If the flag is not set, the number read means:
185 * 1: VP56_FRAME_PREVIOUS
186 * 2: update golden with altref, or update altref with golden
188 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
190 VP56RangeCoder *c = &s->c;
193 return VP56_FRAME_CURRENT;
195 switch (vp8_rac_get_uint(c, 2)) {
197 return VP56_FRAME_PREVIOUS;
199 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
201 return VP56_FRAME_NONE;
204 static void update_refs(VP8Context *s)
206 VP56RangeCoder *c = &s->c;
208 int update_golden = vp8_rac_get(c);
209 int update_altref = vp8_rac_get(c);
211 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
212 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
215 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
217 VP56RangeCoder *c = &s->c;
218 int header_size, hscale, vscale, i, j, k, l, m, ret;
219 int width = s->avctx->width;
220 int height = s->avctx->height;
222 s->keyframe = !(buf[0] & 1);
223 s->profile = (buf[0]>>1) & 7;
224 s->invisible = !(buf[0] & 0x10);
225 header_size = AV_RL24(buf) >> 5;
230 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
233 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
234 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
235 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
237 if (header_size > buf_size - 7*s->keyframe) {
238 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
239 return AVERROR_INVALIDDATA;
243 if (AV_RL24(buf) != 0x2a019d) {
244 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
245 return AVERROR_INVALIDDATA;
247 width = AV_RL16(buf+3) & 0x3fff;
248 height = AV_RL16(buf+5) & 0x3fff;
249 hscale = buf[4] >> 6;
250 vscale = buf[6] >> 6;
254 if (hscale || vscale)
255 av_log_missing_feature(s->avctx, "Upscaling", 1);
257 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
258 for (i = 0; i < 4; i++)
259 for (j = 0; j < 16; j++)
260 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
261 sizeof(s->prob->token[i][j]));
262 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
263 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
264 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
265 memset(&s->segmentation, 0, sizeof(s->segmentation));
268 if (!s->macroblocks_base || /* first frame */
269 width != s->avctx->width || height != s->avctx->height) {
270 if ((ret = update_dimensions(s, width, height) < 0))
274 ff_vp56_init_range_decoder(c, buf, header_size);
276 buf_size -= header_size;
280 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
281 vp8_rac_get(c); // whether we can skip clamping in dsp functions
284 if ((s->segmentation.enabled = vp8_rac_get(c)))
285 parse_segment_info(s);
287 s->segmentation.update_map = 0; // FIXME: move this to some init function?
289 s->filter.simple = vp8_rac_get(c);
290 s->filter.level = vp8_rac_get_uint(c, 6);
291 s->filter.sharpness = vp8_rac_get_uint(c, 3);
293 if ((s->lf_delta.enabled = vp8_rac_get(c)))
297 if (setup_partitions(s, buf, buf_size)) {
298 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
299 return AVERROR_INVALIDDATA;
306 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
307 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
310 // if we aren't saving this frame's probabilities for future frames,
311 // make a copy of the current probabilities
312 if (!(s->update_probabilities = vp8_rac_get(c)))
313 s->prob[1] = s->prob[0];
315 s->update_last = s->keyframe || vp8_rac_get(c);
317 for (i = 0; i < 4; i++)
318 for (j = 0; j < 8; j++)
319 for (k = 0; k < 3; k++)
320 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
321 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
322 int prob = vp8_rac_get_uint(c, 8);
323 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
324 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
327 if ((s->mbskip_enabled = vp8_rac_get(c)))
328 s->prob->mbskip = vp8_rac_get_uint(c, 8);
331 s->prob->intra = vp8_rac_get_uint(c, 8);
332 s->prob->last = vp8_rac_get_uint(c, 8);
333 s->prob->golden = vp8_rac_get_uint(c, 8);
336 for (i = 0; i < 4; i++)
337 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
339 for (i = 0; i < 3; i++)
340 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
342 // 17.2 MV probability update
343 for (i = 0; i < 2; i++)
344 for (j = 0; j < 19; j++)
345 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
346 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
352 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
354 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
355 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
359 * Motion vector coding, 17.1.
361 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
365 if (vp56_rac_get_prob_branchy(c, p[0])) {
368 for (i = 0; i < 3; i++)
369 x += vp56_rac_get_prob(c, p[9 + i]) << i;
370 for (i = 9; i > 3; i--)
371 x += vp56_rac_get_prob(c, p[9 + i]) << i;
372 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
376 const uint8_t *ps = p+2;
377 bit = vp56_rac_get_prob(c, *ps);
380 bit = vp56_rac_get_prob(c, *ps);
383 x += vp56_rac_get_prob(c, *ps);
386 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
389 static av_always_inline
390 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
393 return vp8_submv_prob[4-!!left];
395 return vp8_submv_prob[2];
396 return vp8_submv_prob[1-!!left];
400 * Split motion vector prediction, 16.4.
401 * @returns the number of motion vectors parsed (2, 4 or 16)
403 static av_always_inline
404 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
408 VP8Macroblock *top_mb = &mb[2];
409 VP8Macroblock *left_mb = &mb[-1];
410 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
411 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
412 *mbsplits_cur, *firstidx;
413 VP56mv *top_mv = top_mb->bmv;
414 VP56mv *left_mv = left_mb->bmv;
415 VP56mv *cur_mv = mb->bmv;
417 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
418 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
419 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
421 part_idx = VP8_SPLITMVMODE_8x8;
424 part_idx = VP8_SPLITMVMODE_4x4;
427 num = vp8_mbsplit_count[part_idx];
428 mbsplits_cur = vp8_mbsplits[part_idx],
429 firstidx = vp8_mbfirstidx[part_idx];
430 mb->partitioning = part_idx;
432 for (n = 0; n < num; n++) {
434 uint32_t left, above;
435 const uint8_t *submv_prob;
438 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
440 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
442 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
444 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
446 submv_prob = get_submv_prob(left, above);
448 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
449 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
450 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
451 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
452 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
454 AV_ZERO32(&mb->bmv[n]);
457 AV_WN32A(&mb->bmv[n], above);
460 AV_WN32A(&mb->bmv[n], left);
467 static av_always_inline
468 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
470 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
472 mb + 1 /* top-left */ };
473 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
474 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
476 int cur_sign_bias = s->sign_bias[mb->ref_frame];
477 int8_t *sign_bias = s->sign_bias;
479 uint8_t cnt[4] = { 0 };
480 VP56RangeCoder *c = &s->c;
482 AV_ZERO32(&near_mv[0]);
483 AV_ZERO32(&near_mv[1]);
485 /* Process MB on top, left and top-left */
486 #define MV_EDGE_CHECK(n)\
488 VP8Macroblock *edge = mb_edge[n];\
489 int edge_ref = edge->ref_frame;\
490 if (edge_ref != VP56_FRAME_CURRENT) {\
491 uint32_t mv = AV_RN32A(&edge->mv);\
493 if (cur_sign_bias != sign_bias[edge_ref]) {\
494 /* SWAR negate of the values in mv. */\
496 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
498 if (!n || mv != AV_RN32A(&near_mv[idx]))\
499 AV_WN32A(&near_mv[++idx], mv);\
500 cnt[idx] += 1 + (n != 2);\
502 cnt[CNT_ZERO] += 1 + (n != 2);\
510 mb->partitioning = VP8_SPLITMVMODE_NONE;
511 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
512 mb->mode = VP8_MVMODE_MV;
514 /* If we have three distinct MVs, merge first and last if they're the same */
515 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
516 cnt[CNT_NEAREST] += 1;
518 /* Swap near and nearest if necessary */
519 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
520 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
521 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
524 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
525 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
527 /* Choose the best mv out of 0,0 and the nearest mv */
528 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
529 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
530 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
531 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
533 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
534 mb->mode = VP8_MVMODE_SPLIT;
535 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
537 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
538 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
542 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
546 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
550 mb->mode = VP8_MVMODE_ZERO;
556 static av_always_inline
557 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
558 int mb_x, int keyframe)
560 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
563 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
564 uint8_t* const left = s->intra4x4_pred_mode_left;
565 for (y = 0; y < 4; y++) {
566 for (x = 0; x < 4; x++) {
568 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
569 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
570 left[y] = top[x] = *intra4x4;
576 for (i = 0; i < 16; i++)
577 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
581 static av_always_inline
582 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
584 VP56RangeCoder *c = &s->c;
586 if (s->segmentation.update_map)
587 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
588 s->segment = *segment;
590 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
593 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
595 if (mb->mode == MODE_I4x4) {
596 decode_intra4x4_modes(s, c, mb_x, 1);
598 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
599 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
600 AV_WN32A(s->intra4x4_pred_mode_left, modes);
603 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
604 mb->ref_frame = VP56_FRAME_CURRENT;
605 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
607 if (vp56_rac_get_prob_branchy(c, s->prob->last))
608 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
609 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
611 mb->ref_frame = VP56_FRAME_PREVIOUS;
612 s->ref_count[mb->ref_frame-1]++;
614 // motion vectors, 16.3
615 decode_mvs(s, mb, mb_x, mb_y);
618 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
620 if (mb->mode == MODE_I4x4)
621 decode_intra4x4_modes(s, c, mb_x, 0);
623 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
624 mb->ref_frame = VP56_FRAME_CURRENT;
625 mb->partitioning = VP8_SPLITMVMODE_NONE;
626 AV_ZERO32(&mb->bmv[0]);
630 #ifndef decode_block_coeffs_internal
632 * @param c arithmetic bitstream reader context
633 * @param block destination for block coefficients
634 * @param probs probabilities to use when reading trees from the bitstream
635 * @param i initial coeff index, 0 unless a separate DC block is coded
636 * @param zero_nhood the initial prediction context for number of surrounding
637 * all-zero blocks (only left/top, so 0-2)
638 * @param qmul array holding the dc/ac dequant factor at position 0/1
639 * @return 0 if no coeffs were decoded
640 * otherwise, the index of the last coeff decoded plus one
642 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
643 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
644 int i, uint8_t *token_prob, int16_t qmul[2])
649 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
653 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
655 return i; // invalid input; blocks should end with EOB
656 token_prob = probs[i][0];
660 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
662 token_prob = probs[i+1][1];
664 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
665 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
667 coeff += vp56_rac_get_prob(c, token_prob[5]);
671 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
672 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
673 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
676 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
677 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
679 } else { // DCT_CAT3 and up
680 int a = vp56_rac_get_prob(c, token_prob[8]);
681 int b = vp56_rac_get_prob(c, token_prob[9+a]);
682 int cat = (a<<1) + b;
683 coeff = 3 + (8<<cat);
684 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
687 token_prob = probs[i+1][2];
689 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
696 static av_always_inline
697 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
698 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
699 int i, int zero_nhood, int16_t qmul[2])
701 uint8_t *token_prob = probs[i][zero_nhood];
702 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
704 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
707 static av_always_inline
708 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
709 uint8_t t_nnz[9], uint8_t l_nnz[9])
711 int i, x, y, luma_start = 0, luma_ctx = 3;
712 int nnz_pred, nnz, nnz_total = 0;
713 int segment = s->segment;
716 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
717 nnz_pred = t_nnz[8] + l_nnz[8];
719 // decode DC values and do hadamard
720 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
721 s->qmat[segment].luma_dc_qmul);
722 l_nnz[8] = t_nnz[8] = !!nnz;
727 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
729 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
736 for (y = 0; y < 4; y++)
737 for (x = 0; x < 4; x++) {
738 nnz_pred = l_nnz[y] + t_nnz[x];
739 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
740 nnz_pred, s->qmat[segment].luma_qmul);
741 // nnz+block_dc may be one more than the actual last index, but we don't care
742 s->non_zero_count_cache[y][x] = nnz + block_dc;
743 t_nnz[x] = l_nnz[y] = !!nnz;
748 // TODO: what to do about dimensions? 2nd dim for luma is x,
749 // but for chroma it's (y<<1)|x
750 for (i = 4; i < 6; i++)
751 for (y = 0; y < 2; y++)
752 for (x = 0; x < 2; x++) {
753 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
754 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
755 nnz_pred, s->qmat[segment].chroma_qmul);
756 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
757 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
761 // if there were no coded coeffs despite the macroblock not being marked skip,
762 // we MUST not do the inner loop filter and should not do IDCT
763 // Since skip isn't used for bitstream prediction, just manually set it.
768 static av_always_inline
769 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
770 int linesize, int uvlinesize, int simple)
772 AV_COPY128(top_border, src_y + 15*linesize);
774 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
775 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
779 static av_always_inline
780 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
781 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
782 int simple, int xchg)
784 uint8_t *top_border_m1 = top_border-32; // for TL prediction
786 src_cb -= uvlinesize;
787 src_cr -= uvlinesize;
789 #define XCHG(a,b,xchg) do { \
790 if (xchg) AV_SWAP64(b,a); \
791 else AV_COPY64(b,a); \
794 XCHG(top_border_m1+8, src_y-8, xchg);
795 XCHG(top_border, src_y, xchg);
796 XCHG(top_border+8, src_y+8, 1);
797 if (mb_x < mb_width-1)
798 XCHG(top_border+32, src_y+16, 1);
800 // only copy chroma for normal loop filter
801 // or to initialize the top row to 127
802 if (!simple || !mb_y) {
803 XCHG(top_border_m1+16, src_cb-8, xchg);
804 XCHG(top_border_m1+24, src_cr-8, xchg);
805 XCHG(top_border+16, src_cb, 1);
806 XCHG(top_border+24, src_cr, 1);
810 static av_always_inline
811 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
814 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
816 return mb_y ? mode : LEFT_DC_PRED8x8;
820 static av_always_inline
821 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
824 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
826 return mb_y ? mode : HOR_PRED8x8;
830 static av_always_inline
831 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
833 if (mode == DC_PRED8x8) {
834 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
840 static av_always_inline
841 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
845 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
847 return !mb_y ? DC_127_PRED8x8 : mode;
849 return !mb_x ? DC_129_PRED8x8 : mode;
850 case PLANE_PRED8x8 /*TM*/:
851 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
856 static av_always_inline
857 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
860 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
862 return mb_y ? mode : HOR_VP8_PRED;
866 static av_always_inline
867 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
876 case DIAG_DOWN_LEFT_PRED:
878 return !mb_y ? DC_127_PRED : mode;
886 return !mb_x ? DC_129_PRED : mode;
888 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
889 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
890 case DIAG_DOWN_RIGHT_PRED:
891 case VERT_RIGHT_PRED:
900 static av_always_inline
901 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
904 AVCodecContext *avctx = s->avctx;
905 int x, y, mode, nnz, tr;
907 // for the first row, we need to run xchg_mb_border to init the top edge to 127
908 // otherwise, skip it if we aren't going to deblock
909 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
910 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
911 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
912 s->filter.simple, 1);
914 if (mb->mode < MODE_I4x4) {
915 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
916 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
918 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
920 s->hpc.pred16x16[mode](dst[0], s->linesize);
922 uint8_t *ptr = dst[0];
923 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
924 uint8_t tr_top[4] = { 127, 127, 127, 127 };
926 // all blocks on the right edge of the macroblock use bottom edge
927 // the top macroblock for their topright edge
928 uint8_t *tr_right = ptr - s->linesize + 16;
930 // if we're on the right edge of the frame, said edge is extended
931 // from the top macroblock
932 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
933 mb_x == s->mb_width-1) {
934 tr = tr_right[-1]*0x01010101;
935 tr_right = (uint8_t *)&tr;
939 AV_ZERO128(s->non_zero_count_cache);
941 for (y = 0; y < 4; y++) {
942 uint8_t *topright = ptr + 4 - s->linesize;
943 for (x = 0; x < 4; x++) {
944 int copy = 0, linesize = s->linesize;
945 uint8_t *dst = ptr+4*x;
946 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
948 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
953 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
954 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
960 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
962 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
966 copy_dst[3] = ptr[4*x-s->linesize-1];
975 copy_dst[11] = ptr[4*x -1];
976 copy_dst[19] = ptr[4*x+s->linesize -1];
977 copy_dst[27] = ptr[4*x+s->linesize*2-1];
978 copy_dst[35] = ptr[4*x+s->linesize*3-1];
984 s->hpc.pred4x4[mode](dst, topright, linesize);
986 AV_COPY32(ptr+4*x , copy_dst+12);
987 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
988 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
989 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
992 nnz = s->non_zero_count_cache[y][x];
995 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
997 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1002 ptr += 4*s->linesize;
1007 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1008 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1010 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1012 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1013 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1015 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1016 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1017 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1018 s->filter.simple, 0);
1021 static const uint8_t subpel_idx[3][8] = {
1022 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1023 // also function pointer index
1024 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1025 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1029 * Generic MC function.
1031 * @param s VP8 decoding context
1032 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1033 * @param dst target buffer for block data at block position
1034 * @param src reference picture buffer at origin (0, 0)
1035 * @param mv motion vector (relative to block position) to get pixel data from
1036 * @param x_off horizontal position of block from origin (0, 0)
1037 * @param y_off vertical position of block from origin (0, 0)
1038 * @param block_w width of block (16, 8 or 4)
1039 * @param block_h height of block (always same as block_w)
1040 * @param width width of src/dst plane data
1041 * @param height height of src/dst plane data
1042 * @param linesize size of a single line of plane data, including padding
1043 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1045 static av_always_inline
1046 void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1047 int x_off, int y_off, int block_w, int block_h,
1048 int width, int height, int linesize,
1049 vp8_mc_func mc_func[3][3])
1053 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1054 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1056 x_off += mv->x >> 2;
1057 y_off += mv->y >> 2;
1060 src += y_off * linesize + x_off;
1061 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1062 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1063 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1064 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1065 x_off - mx_idx, y_off - my_idx, width, height);
1066 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1068 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1070 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1073 static av_always_inline
1074 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1075 uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1076 int block_w, int block_h, int width, int height, int linesize,
1077 vp8_mc_func mc_func[3][3])
1080 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1081 int my = mv->y&7, my_idx = subpel_idx[0][my];
1083 x_off += mv->x >> 3;
1084 y_off += mv->y >> 3;
1087 src1 += y_off * linesize + x_off;
1088 src2 += y_off * linesize + x_off;
1089 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1090 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1091 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1092 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1093 x_off - mx_idx, y_off - my_idx, width, height);
1094 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1095 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1097 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1098 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1099 x_off - mx_idx, y_off - my_idx, width, height);
1100 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1101 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1103 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1104 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1107 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1108 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1112 static av_always_inline
1113 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1114 AVFrame *ref_frame, int x_off, int y_off,
1115 int bx_off, int by_off,
1116 int block_w, int block_h,
1117 int width, int height, VP56mv *mv)
1122 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1123 ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1124 block_w, block_h, width, height, s->linesize,
1125 s->put_pixels_tab[block_w == 8]);
1128 if (s->profile == 3) {
1132 x_off >>= 1; y_off >>= 1;
1133 bx_off >>= 1; by_off >>= 1;
1134 width >>= 1; height >>= 1;
1135 block_w >>= 1; block_h >>= 1;
1136 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1137 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1138 ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1139 block_w, block_h, width, height, s->uvlinesize,
1140 s->put_pixels_tab[1 + (block_w == 4)]);
1143 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1144 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1145 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1147 /* Don't prefetch refs that haven't been used very often this frame. */
1148 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1149 int x_off = mb_x << 4, y_off = mb_y << 4;
1150 int mx = (mb->mv.x>>2) + x_off + 8;
1151 int my = (mb->mv.y>>2) + y_off;
1152 uint8_t **src= s->framep[ref]->data;
1153 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1154 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1155 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1156 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1161 * Apply motion vectors to prediction buffer, chapter 18.
1163 static av_always_inline
1164 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1167 int x_off = mb_x << 4, y_off = mb_y << 4;
1168 int width = 16*s->mb_width, height = 16*s->mb_height;
1169 AVFrame *ref = s->framep[mb->ref_frame];
1170 VP56mv *bmv = mb->bmv;
1172 switch (mb->partitioning) {
1173 case VP8_SPLITMVMODE_NONE:
1174 vp8_mc_part(s, dst, ref, x_off, y_off,
1175 0, 0, 16, 16, width, height, &mb->mv);
1177 case VP8_SPLITMVMODE_4x4: {
1182 for (y = 0; y < 4; y++) {
1183 for (x = 0; x < 4; x++) {
1184 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1185 ref->data[0], &bmv[4*y + x],
1186 4*x + x_off, 4*y + y_off, 4, 4,
1187 width, height, s->linesize,
1188 s->put_pixels_tab[2]);
1193 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1194 for (y = 0; y < 2; y++) {
1195 for (x = 0; x < 2; x++) {
1196 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1197 mb->bmv[ 2*y * 4 + 2*x+1].x +
1198 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1199 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1200 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1201 mb->bmv[ 2*y * 4 + 2*x+1].y +
1202 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1203 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1204 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1205 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1206 if (s->profile == 3) {
1210 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1211 dst[2] + 4*y*s->uvlinesize + x*4,
1212 ref->data[1], ref->data[2], &uvmv,
1213 4*x + x_off, 4*y + y_off, 4, 4,
1214 width, height, s->uvlinesize,
1215 s->put_pixels_tab[2]);
1220 case VP8_SPLITMVMODE_16x8:
1221 vp8_mc_part(s, dst, ref, x_off, y_off,
1222 0, 0, 16, 8, width, height, &bmv[0]);
1223 vp8_mc_part(s, dst, ref, x_off, y_off,
1224 0, 8, 16, 8, width, height, &bmv[1]);
1226 case VP8_SPLITMVMODE_8x16:
1227 vp8_mc_part(s, dst, ref, x_off, y_off,
1228 0, 0, 8, 16, width, height, &bmv[0]);
1229 vp8_mc_part(s, dst, ref, x_off, y_off,
1230 8, 0, 8, 16, width, height, &bmv[1]);
1232 case VP8_SPLITMVMODE_8x8:
1233 vp8_mc_part(s, dst, ref, x_off, y_off,
1234 0, 0, 8, 8, width, height, &bmv[0]);
1235 vp8_mc_part(s, dst, ref, x_off, y_off,
1236 8, 0, 8, 8, width, height, &bmv[1]);
1237 vp8_mc_part(s, dst, ref, x_off, y_off,
1238 0, 8, 8, 8, width, height, &bmv[2]);
1239 vp8_mc_part(s, dst, ref, x_off, y_off,
1240 8, 8, 8, 8, width, height, &bmv[3]);
1245 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1249 if (mb->mode != MODE_I4x4) {
1250 uint8_t *y_dst = dst[0];
1251 for (y = 0; y < 4; y++) {
1252 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1254 if (nnz4&~0x01010101) {
1255 for (x = 0; x < 4; x++) {
1256 if ((uint8_t)nnz4 == 1)
1257 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1258 else if((uint8_t)nnz4 > 1)
1259 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1265 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1268 y_dst += 4*s->linesize;
1272 for (ch = 0; ch < 2; ch++) {
1273 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1275 uint8_t *ch_dst = dst[1+ch];
1276 if (nnz4&~0x01010101) {
1277 for (y = 0; y < 2; y++) {
1278 for (x = 0; x < 2; x++) {
1279 if ((uint8_t)nnz4 == 1)
1280 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1281 else if((uint8_t)nnz4 > 1)
1282 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1285 goto chroma_idct_end;
1287 ch_dst += 4*s->uvlinesize;
1290 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1297 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1299 int interior_limit, filter_level;
1301 if (s->segmentation.enabled) {
1302 filter_level = s->segmentation.filter_level[s->segment];
1303 if (!s->segmentation.absolute_vals)
1304 filter_level += s->filter.level;
1306 filter_level = s->filter.level;
1308 if (s->lf_delta.enabled) {
1309 filter_level += s->lf_delta.ref[mb->ref_frame];
1310 filter_level += s->lf_delta.mode[mb->mode];
1313 /* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1314 #define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1315 filter_level = POW2CLIP(filter_level, 63);
1317 interior_limit = filter_level;
1318 if (s->filter.sharpness) {
1319 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1320 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1322 interior_limit = FFMAX(interior_limit, 1);
1324 f->filter_level = filter_level;
1325 f->inner_limit = interior_limit;
1326 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1329 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1331 int mbedge_lim, bedge_lim, hev_thresh;
1332 int filter_level = f->filter_level;
1333 int inner_limit = f->inner_limit;
1334 int inner_filter = f->inner_filter;
1335 int linesize = s->linesize;
1336 int uvlinesize = s->uvlinesize;
1337 static const uint8_t hev_thresh_lut[2][64] = {
1338 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1339 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1340 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1342 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1344 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1351 bedge_lim = 2*filter_level + inner_limit;
1352 mbedge_lim = bedge_lim + 4;
1354 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1357 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1358 mbedge_lim, inner_limit, hev_thresh);
1359 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1360 mbedge_lim, inner_limit, hev_thresh);
1364 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1365 inner_limit, hev_thresh);
1366 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1367 inner_limit, hev_thresh);
1368 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1369 inner_limit, hev_thresh);
1370 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1371 uvlinesize, bedge_lim,
1372 inner_limit, hev_thresh);
1376 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1377 mbedge_lim, inner_limit, hev_thresh);
1378 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1379 mbedge_lim, inner_limit, hev_thresh);
1383 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1384 linesize, bedge_lim,
1385 inner_limit, hev_thresh);
1386 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1387 linesize, bedge_lim,
1388 inner_limit, hev_thresh);
1389 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1390 linesize, bedge_lim,
1391 inner_limit, hev_thresh);
1392 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1393 dst[2] + 4 * uvlinesize,
1394 uvlinesize, bedge_lim,
1395 inner_limit, hev_thresh);
1399 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1401 int mbedge_lim, bedge_lim;
1402 int filter_level = f->filter_level;
1403 int inner_limit = f->inner_limit;
1404 int inner_filter = f->inner_filter;
1405 int linesize = s->linesize;
1410 bedge_lim = 2*filter_level + inner_limit;
1411 mbedge_lim = bedge_lim + 4;
1414 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1416 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1417 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1418 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1422 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1424 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1425 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1426 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1430 static void filter_mb_row(VP8Context *s, int mb_y)
1432 VP8FilterStrength *f = s->filter_strength;
1434 s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1435 s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize,
1436 s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize
1440 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1441 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1442 filter_mb(s, dst, f++, mb_x, mb_y);
1449 static void filter_mb_row_simple(VP8Context *s, int mb_y)
1451 VP8FilterStrength *f = s->filter_strength;
1452 uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1455 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1456 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1457 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1462 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1465 VP8Context *s = avctx->priv_data;
1466 int ret, mb_x, mb_y, i, y, referenced;
1467 enum AVDiscard skip_thresh;
1468 AVFrame *av_uninit(curframe);
1470 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1473 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1474 || s->update_altref == VP56_FRAME_CURRENT;
1476 skip_thresh = !referenced ? AVDISCARD_NONREF :
1477 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1479 if (avctx->skip_frame >= skip_thresh) {
1483 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1485 for (i = 0; i < 4; i++)
1486 if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1487 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1488 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1489 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1492 if (curframe->data[0])
1493 avctx->release_buffer(avctx, curframe);
1495 curframe->key_frame = s->keyframe;
1496 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1497 curframe->reference = referenced ? 3 : 0;
1498 if ((ret = avctx->get_buffer(avctx, curframe))) {
1499 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1503 // Given that arithmetic probabilities are updated every frame, it's quite likely
1504 // that the values we have on a random interframe are complete junk if we didn't
1505 // start decode on a keyframe. So just don't display anything rather than junk.
1506 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1507 !s->framep[VP56_FRAME_GOLDEN] ||
1508 !s->framep[VP56_FRAME_GOLDEN2])) {
1509 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1510 return AVERROR_INVALIDDATA;
1513 s->linesize = curframe->linesize[0];
1514 s->uvlinesize = curframe->linesize[1];
1516 if (!s->edge_emu_buffer)
1517 s->edge_emu_buffer = av_malloc(21*s->linesize);
1519 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1521 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1522 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1524 // top edge of 127 for intra prediction
1525 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1526 s->top_border[0][15] = s->top_border[0][23] = 127;
1527 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1529 memset(s->ref_count, 0, sizeof(s->ref_count));
1531 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1533 #define MARGIN (16 << 2)
1534 s->mv_min.y = -MARGIN;
1535 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1537 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1538 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1539 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1540 int mb_xy = mb_y*s->mb_width;
1542 curframe->data[0] + 16*mb_y*s->linesize,
1543 curframe->data[1] + 8*mb_y*s->uvlinesize,
1544 curframe->data[2] + 8*mb_y*s->uvlinesize
1547 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1548 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1549 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1551 // left edge of 129 for intra prediction
1552 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1553 for (i = 0; i < 3; i++)
1554 for (y = 0; y < 16>>!!i; y++)
1555 dst[i][y*curframe->linesize[i]-1] = 129;
1556 if (mb_y == 1) // top left edge is also 129
1557 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1560 s->mv_min.x = -MARGIN;
1561 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1563 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1564 /* Prefetch the current frame, 4 MBs ahead */
1565 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1566 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1568 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
1570 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1573 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1575 if (mb->mode <= MODE_I4x4)
1576 intra_predict(s, dst, mb, mb_x, mb_y);
1578 inter_predict(s, dst, mb, mb_x, mb_y);
1580 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1583 idct_mb(s, dst, mb);
1585 AV_ZERO64(s->left_nnz);
1586 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1588 // Reset DC block predictors if they would exist if the mb had coefficients
1589 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1591 s->top_nnz[mb_x][8] = 0;
1595 if (s->deblock_filter)
1596 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1598 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1606 if (s->deblock_filter) {
1607 if (s->filter.simple)
1608 filter_mb_row_simple(s, mb_y);
1610 filter_mb_row(s, mb_y);
1617 // if future frames don't use the updated probabilities,
1618 // reset them to the values we saved
1619 if (!s->update_probabilities)
1620 s->prob[0] = s->prob[1];
1622 // check if golden and altref are swapped
1623 if (s->update_altref == VP56_FRAME_GOLDEN &&
1624 s->update_golden == VP56_FRAME_GOLDEN2)
1625 FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1627 if (s->update_altref != VP56_FRAME_NONE)
1628 s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1630 if (s->update_golden != VP56_FRAME_NONE)
1631 s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1634 if (s->update_last) // move cur->prev
1635 s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1637 // release no longer referenced frames
1638 for (i = 0; i < 4; i++)
1639 if (s->frames[i].data[0] &&
1640 &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1641 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1642 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1643 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1644 avctx->release_buffer(avctx, &s->frames[i]);
1646 if (!s->invisible) {
1647 *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1648 *data_size = sizeof(AVFrame);
1654 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1656 VP8Context *s = avctx->priv_data;
1659 avctx->pix_fmt = PIX_FMT_YUV420P;
1661 dsputil_init(&s->dsp, avctx);
1662 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1663 ff_vp8dsp_init(&s->vp8dsp);
1668 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1670 vp8_decode_flush(avctx);
1674 AVCodec ff_vp8_decoder = {
1684 .flush = vp8_decode_flush,
1685 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),