2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void free_buffers(VP8Context *s)
38 av_freep(&s->macroblocks_base);
39 av_freep(&s->filter_strength);
40 av_freep(&s->intra4x4_pred_mode_top);
41 av_freep(&s->top_nnz);
42 av_freep(&s->edge_emu_buffer);
43 av_freep(&s->top_border);
44 av_freep(&s->segmentation_map);
46 s->macroblocks = NULL;
49 static void vp8_decode_flush(AVCodecContext *avctx)
51 VP8Context *s = avctx->priv_data;
54 if (!avctx->is_copy) {
55 for (i = 0; i < 5; i++)
56 if (s->frames[i].data[0])
57 ff_thread_release_buffer(avctx, &s->frames[i]);
59 memset(s->framep, 0, sizeof(s->framep));
64 static int update_dimensions(VP8Context *s, int width, int height)
66 if (width != s->avctx->width ||
67 height != s->avctx->height) {
68 if (av_image_check_size(width, height, 0, s->avctx))
69 return AVERROR_INVALIDDATA;
71 vp8_decode_flush(s->avctx);
73 avcodec_set_dimensions(s->avctx, width, height);
76 s->mb_width = (s->avctx->coded_width +15) / 16;
77 s->mb_height = (s->avctx->coded_height+15) / 16;
79 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
80 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
81 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
82 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
83 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
84 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
86 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
87 !s->top_nnz || !s->top_border || !s->segmentation_map)
88 return AVERROR(ENOMEM);
90 s->macroblocks = s->macroblocks_base + 1;
95 static void parse_segment_info(VP8Context *s)
97 VP56RangeCoder *c = &s->c;
100 s->segmentation.update_map = vp8_rac_get(c);
102 if (vp8_rac_get(c)) { // update segment feature data
103 s->segmentation.absolute_vals = vp8_rac_get(c);
105 for (i = 0; i < 4; i++)
106 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
108 for (i = 0; i < 4; i++)
109 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
111 if (s->segmentation.update_map)
112 for (i = 0; i < 3; i++)
113 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
116 static void update_lf_deltas(VP8Context *s)
118 VP56RangeCoder *c = &s->c;
121 for (i = 0; i < 4; i++)
122 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
124 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
125 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
128 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
130 const uint8_t *sizes = buf;
133 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
135 buf += 3*(s->num_coeff_partitions-1);
136 buf_size -= 3*(s->num_coeff_partitions-1);
140 for (i = 0; i < s->num_coeff_partitions-1; i++) {
141 int size = AV_RL24(sizes + 3*i);
142 if (buf_size - size < 0)
145 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
149 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
154 static void get_quants(VP8Context *s)
156 VP56RangeCoder *c = &s->c;
159 int yac_qi = vp8_rac_get_uint(c, 7);
160 int ydc_delta = vp8_rac_get_sint(c, 4);
161 int y2dc_delta = vp8_rac_get_sint(c, 4);
162 int y2ac_delta = vp8_rac_get_sint(c, 4);
163 int uvdc_delta = vp8_rac_get_sint(c, 4);
164 int uvac_delta = vp8_rac_get_sint(c, 4);
166 for (i = 0; i < 4; i++) {
167 if (s->segmentation.enabled) {
168 base_qi = s->segmentation.base_quant[i];
169 if (!s->segmentation.absolute_vals)
174 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
175 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
176 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
177 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
178 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
179 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
181 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
182 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
187 * Determine which buffers golden and altref should be updated with after this frame.
188 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
190 * Intra frames update all 3 references
191 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
192 * If the update (golden|altref) flag is set, it's updated with the current frame
193 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
194 * If the flag is not set, the number read means:
196 * 1: VP56_FRAME_PREVIOUS
197 * 2: update golden with altref, or update altref with golden
199 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
201 VP56RangeCoder *c = &s->c;
204 return VP56_FRAME_CURRENT;
206 switch (vp8_rac_get_uint(c, 2)) {
208 return VP56_FRAME_PREVIOUS;
210 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
212 return VP56_FRAME_NONE;
215 static void update_refs(VP8Context *s)
217 VP56RangeCoder *c = &s->c;
219 int update_golden = vp8_rac_get(c);
220 int update_altref = vp8_rac_get(c);
222 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
223 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
226 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
228 VP56RangeCoder *c = &s->c;
229 int header_size, hscale, vscale, i, j, k, l, m, ret;
230 int width = s->avctx->width;
231 int height = s->avctx->height;
233 s->keyframe = !(buf[0] & 1);
234 s->profile = (buf[0]>>1) & 7;
235 s->invisible = !(buf[0] & 0x10);
236 header_size = AV_RL24(buf) >> 5;
241 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
244 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
245 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
246 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
248 if (header_size > buf_size - 7*s->keyframe) {
249 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
250 return AVERROR_INVALIDDATA;
254 if (AV_RL24(buf) != 0x2a019d) {
255 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
256 return AVERROR_INVALIDDATA;
258 width = AV_RL16(buf+3) & 0x3fff;
259 height = AV_RL16(buf+5) & 0x3fff;
260 hscale = buf[4] >> 6;
261 vscale = buf[6] >> 6;
265 if (hscale || vscale)
266 av_log_missing_feature(s->avctx, "Upscaling", 1);
268 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
269 for (i = 0; i < 4; i++)
270 for (j = 0; j < 16; j++)
271 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
272 sizeof(s->prob->token[i][j]));
273 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
274 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
275 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
276 memset(&s->segmentation, 0, sizeof(s->segmentation));
279 if (!s->macroblocks_base || /* first frame */
280 width != s->avctx->width || height != s->avctx->height) {
281 if ((ret = update_dimensions(s, width, height)) < 0)
285 ff_vp56_init_range_decoder(c, buf, header_size);
287 buf_size -= header_size;
291 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
292 vp8_rac_get(c); // whether we can skip clamping in dsp functions
295 if ((s->segmentation.enabled = vp8_rac_get(c)))
296 parse_segment_info(s);
298 s->segmentation.update_map = 0; // FIXME: move this to some init function?
300 s->filter.simple = vp8_rac_get(c);
301 s->filter.level = vp8_rac_get_uint(c, 6);
302 s->filter.sharpness = vp8_rac_get_uint(c, 3);
304 if ((s->lf_delta.enabled = vp8_rac_get(c)))
308 if (setup_partitions(s, buf, buf_size)) {
309 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
310 return AVERROR_INVALIDDATA;
317 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
318 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
321 // if we aren't saving this frame's probabilities for future frames,
322 // make a copy of the current probabilities
323 if (!(s->update_probabilities = vp8_rac_get(c)))
324 s->prob[1] = s->prob[0];
326 s->update_last = s->keyframe || vp8_rac_get(c);
328 for (i = 0; i < 4; i++)
329 for (j = 0; j < 8; j++)
330 for (k = 0; k < 3; k++)
331 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
332 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
333 int prob = vp8_rac_get_uint(c, 8);
334 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
335 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
338 if ((s->mbskip_enabled = vp8_rac_get(c)))
339 s->prob->mbskip = vp8_rac_get_uint(c, 8);
342 s->prob->intra = vp8_rac_get_uint(c, 8);
343 s->prob->last = vp8_rac_get_uint(c, 8);
344 s->prob->golden = vp8_rac_get_uint(c, 8);
347 for (i = 0; i < 4; i++)
348 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
350 for (i = 0; i < 3; i++)
351 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
353 // 17.2 MV probability update
354 for (i = 0; i < 2; i++)
355 for (j = 0; j < 19; j++)
356 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
357 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
363 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
365 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
366 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
370 * Motion vector coding, 17.1.
372 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
376 if (vp56_rac_get_prob_branchy(c, p[0])) {
379 for (i = 0; i < 3; i++)
380 x += vp56_rac_get_prob(c, p[9 + i]) << i;
381 for (i = 9; i > 3; i--)
382 x += vp56_rac_get_prob(c, p[9 + i]) << i;
383 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
387 const uint8_t *ps = p+2;
388 bit = vp56_rac_get_prob(c, *ps);
391 bit = vp56_rac_get_prob(c, *ps);
394 x += vp56_rac_get_prob(c, *ps);
397 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
400 static av_always_inline
401 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
404 return vp8_submv_prob[4-!!left];
406 return vp8_submv_prob[2];
407 return vp8_submv_prob[1-!!left];
411 * Split motion vector prediction, 16.4.
412 * @returns the number of motion vectors parsed (2, 4 or 16)
414 static av_always_inline
415 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
419 VP8Macroblock *top_mb = &mb[2];
420 VP8Macroblock *left_mb = &mb[-1];
421 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
422 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
423 *mbsplits_cur, *firstidx;
424 VP56mv *top_mv = top_mb->bmv;
425 VP56mv *left_mv = left_mb->bmv;
426 VP56mv *cur_mv = mb->bmv;
428 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
429 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
430 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
432 part_idx = VP8_SPLITMVMODE_8x8;
435 part_idx = VP8_SPLITMVMODE_4x4;
438 num = vp8_mbsplit_count[part_idx];
439 mbsplits_cur = vp8_mbsplits[part_idx],
440 firstidx = vp8_mbfirstidx[part_idx];
441 mb->partitioning = part_idx;
443 for (n = 0; n < num; n++) {
445 uint32_t left, above;
446 const uint8_t *submv_prob;
449 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
451 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
453 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
455 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
457 submv_prob = get_submv_prob(left, above);
459 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
460 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
461 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
462 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
463 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
465 AV_ZERO32(&mb->bmv[n]);
468 AV_WN32A(&mb->bmv[n], above);
471 AV_WN32A(&mb->bmv[n], left);
478 static av_always_inline
479 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
481 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
483 mb + 1 /* top-left */ };
484 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
485 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
487 int cur_sign_bias = s->sign_bias[mb->ref_frame];
488 int8_t *sign_bias = s->sign_bias;
490 uint8_t cnt[4] = { 0 };
491 VP56RangeCoder *c = &s->c;
493 AV_ZERO32(&near_mv[0]);
494 AV_ZERO32(&near_mv[1]);
495 AV_ZERO32(&near_mv[2]);
497 /* Process MB on top, left and top-left */
498 #define MV_EDGE_CHECK(n)\
500 VP8Macroblock *edge = mb_edge[n];\
501 int edge_ref = edge->ref_frame;\
502 if (edge_ref != VP56_FRAME_CURRENT) {\
503 uint32_t mv = AV_RN32A(&edge->mv);\
505 if (cur_sign_bias != sign_bias[edge_ref]) {\
506 /* SWAR negate of the values in mv. */\
508 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
510 if (!n || mv != AV_RN32A(&near_mv[idx]))\
511 AV_WN32A(&near_mv[++idx], mv);\
512 cnt[idx] += 1 + (n != 2);\
514 cnt[CNT_ZERO] += 1 + (n != 2);\
522 mb->partitioning = VP8_SPLITMVMODE_NONE;
523 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
524 mb->mode = VP8_MVMODE_MV;
526 /* If we have three distinct MVs, merge first and last if they're the same */
527 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
528 cnt[CNT_NEAREST] += 1;
530 /* Swap near and nearest if necessary */
531 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
532 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
533 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
536 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
537 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
539 /* Choose the best mv out of 0,0 and the nearest mv */
540 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
541 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
542 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
543 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
545 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
546 mb->mode = VP8_MVMODE_SPLIT;
547 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
549 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
550 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
554 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
558 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
562 mb->mode = VP8_MVMODE_ZERO;
568 static av_always_inline
569 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
570 int mb_x, int keyframe)
572 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
575 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
576 uint8_t* const left = s->intra4x4_pred_mode_left;
577 for (y = 0; y < 4; y++) {
578 for (x = 0; x < 4; x++) {
580 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
581 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
582 left[y] = top[x] = *intra4x4;
588 for (i = 0; i < 16; i++)
589 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
593 static av_always_inline
594 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
596 VP56RangeCoder *c = &s->c;
598 if (s->segmentation.update_map)
599 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
601 *segment = ref ? *ref : *segment;
602 s->segment = *segment;
604 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
607 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
609 if (mb->mode == MODE_I4x4) {
610 decode_intra4x4_modes(s, c, mb_x, 1);
612 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
613 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
614 AV_WN32A(s->intra4x4_pred_mode_left, modes);
617 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
618 mb->ref_frame = VP56_FRAME_CURRENT;
619 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
621 if (vp56_rac_get_prob_branchy(c, s->prob->last))
622 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
623 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
625 mb->ref_frame = VP56_FRAME_PREVIOUS;
626 s->ref_count[mb->ref_frame-1]++;
628 // motion vectors, 16.3
629 decode_mvs(s, mb, mb_x, mb_y);
632 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
634 if (mb->mode == MODE_I4x4)
635 decode_intra4x4_modes(s, c, mb_x, 0);
637 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
638 mb->ref_frame = VP56_FRAME_CURRENT;
639 mb->partitioning = VP8_SPLITMVMODE_NONE;
640 AV_ZERO32(&mb->bmv[0]);
644 #ifndef decode_block_coeffs_internal
646 * @param c arithmetic bitstream reader context
647 * @param block destination for block coefficients
648 * @param probs probabilities to use when reading trees from the bitstream
649 * @param i initial coeff index, 0 unless a separate DC block is coded
650 * @param qmul array holding the dc/ac dequant factor at position 0/1
651 * @return 0 if no coeffs were decoded
652 * otherwise, the index of the last coeff decoded plus one
654 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
655 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
656 int i, uint8_t *token_prob, int16_t qmul[2])
661 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
665 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
667 return i; // invalid input; blocks should end with EOB
668 token_prob = probs[i][0];
672 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
674 token_prob = probs[i+1][1];
676 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
677 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
679 coeff += vp56_rac_get_prob(c, token_prob[5]);
683 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
684 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
685 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
688 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
689 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
691 } else { // DCT_CAT3 and up
692 int a = vp56_rac_get_prob(c, token_prob[8]);
693 int b = vp56_rac_get_prob(c, token_prob[9+a]);
694 int cat = (a<<1) + b;
695 coeff = 3 + (8<<cat);
696 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
699 token_prob = probs[i+1][2];
701 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
709 * @param c arithmetic bitstream reader context
710 * @param block destination for block coefficients
711 * @param probs probabilities to use when reading trees from the bitstream
712 * @param i initial coeff index, 0 unless a separate DC block is coded
713 * @param zero_nhood the initial prediction context for number of surrounding
714 * all-zero blocks (only left/top, so 0-2)
715 * @param qmul array holding the dc/ac dequant factor at position 0/1
716 * @return 0 if no coeffs were decoded
717 * otherwise, the index of the last coeff decoded plus one
719 static av_always_inline
720 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
721 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
722 int i, int zero_nhood, int16_t qmul[2])
724 uint8_t *token_prob = probs[i][zero_nhood];
725 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
727 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
730 static av_always_inline
731 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
732 uint8_t t_nnz[9], uint8_t l_nnz[9])
734 int i, x, y, luma_start = 0, luma_ctx = 3;
735 int nnz_pred, nnz, nnz_total = 0;
736 int segment = s->segment;
739 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
740 nnz_pred = t_nnz[8] + l_nnz[8];
742 // decode DC values and do hadamard
743 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
744 s->qmat[segment].luma_dc_qmul);
745 l_nnz[8] = t_nnz[8] = !!nnz;
750 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
752 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
759 for (y = 0; y < 4; y++)
760 for (x = 0; x < 4; x++) {
761 nnz_pred = l_nnz[y] + t_nnz[x];
762 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
763 nnz_pred, s->qmat[segment].luma_qmul);
764 // nnz+block_dc may be one more than the actual last index, but we don't care
765 s->non_zero_count_cache[y][x] = nnz + block_dc;
766 t_nnz[x] = l_nnz[y] = !!nnz;
771 // TODO: what to do about dimensions? 2nd dim for luma is x,
772 // but for chroma it's (y<<1)|x
773 for (i = 4; i < 6; i++)
774 for (y = 0; y < 2; y++)
775 for (x = 0; x < 2; x++) {
776 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
777 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
778 nnz_pred, s->qmat[segment].chroma_qmul);
779 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
780 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
784 // if there were no coded coeffs despite the macroblock not being marked skip,
785 // we MUST not do the inner loop filter and should not do IDCT
786 // Since skip isn't used for bitstream prediction, just manually set it.
791 static av_always_inline
792 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
793 int linesize, int uvlinesize, int simple)
795 AV_COPY128(top_border, src_y + 15*linesize);
797 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
798 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
802 static av_always_inline
803 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
804 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
805 int simple, int xchg)
807 uint8_t *top_border_m1 = top_border-32; // for TL prediction
809 src_cb -= uvlinesize;
810 src_cr -= uvlinesize;
812 #define XCHG(a,b,xchg) do { \
813 if (xchg) AV_SWAP64(b,a); \
814 else AV_COPY64(b,a); \
817 XCHG(top_border_m1+8, src_y-8, xchg);
818 XCHG(top_border, src_y, xchg);
819 XCHG(top_border+8, src_y+8, 1);
820 if (mb_x < mb_width-1)
821 XCHG(top_border+32, src_y+16, 1);
823 // only copy chroma for normal loop filter
824 // or to initialize the top row to 127
825 if (!simple || !mb_y) {
826 XCHG(top_border_m1+16, src_cb-8, xchg);
827 XCHG(top_border_m1+24, src_cr-8, xchg);
828 XCHG(top_border+16, src_cb, 1);
829 XCHG(top_border+24, src_cr, 1);
833 static av_always_inline
834 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
837 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
839 return mb_y ? mode : LEFT_DC_PRED8x8;
843 static av_always_inline
844 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
847 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
849 return mb_y ? mode : HOR_PRED8x8;
853 static av_always_inline
854 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
856 if (mode == DC_PRED8x8) {
857 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
863 static av_always_inline
864 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
868 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
870 return !mb_y ? DC_127_PRED8x8 : mode;
872 return !mb_x ? DC_129_PRED8x8 : mode;
873 case PLANE_PRED8x8 /*TM*/:
874 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
879 static av_always_inline
880 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
883 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
885 return mb_y ? mode : HOR_VP8_PRED;
889 static av_always_inline
890 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
899 case DIAG_DOWN_LEFT_PRED:
901 return !mb_y ? DC_127_PRED : mode;
909 return !mb_x ? DC_129_PRED : mode;
911 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
912 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
913 case DIAG_DOWN_RIGHT_PRED:
914 case VERT_RIGHT_PRED:
923 static av_always_inline
924 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
927 AVCodecContext *avctx = s->avctx;
931 // for the first row, we need to run xchg_mb_border to init the top edge to 127
932 // otherwise, skip it if we aren't going to deblock
933 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
934 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
935 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
936 s->filter.simple, 1);
938 if (mb->mode < MODE_I4x4) {
939 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
940 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
942 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
944 s->hpc.pred16x16[mode](dst[0], s->linesize);
946 uint8_t *ptr = dst[0];
947 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
948 uint8_t tr_top[4] = { 127, 127, 127, 127 };
950 // all blocks on the right edge of the macroblock use bottom edge
951 // the top macroblock for their topright edge
952 uint8_t *tr_right = ptr - s->linesize + 16;
954 // if we're on the right edge of the frame, said edge is extended
955 // from the top macroblock
956 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
957 mb_x == s->mb_width-1) {
958 tr = tr_right[-1]*0x01010101u;
959 tr_right = (uint8_t *)&tr;
963 AV_ZERO128(s->non_zero_count_cache);
965 for (y = 0; y < 4; y++) {
966 uint8_t *topright = ptr + 4 - s->linesize;
967 for (x = 0; x < 4; x++) {
968 int copy = 0, linesize = s->linesize;
969 uint8_t *dst = ptr+4*x;
970 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
972 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
977 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
978 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
984 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
986 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
990 copy_dst[3] = ptr[4*x-s->linesize-1];
999 copy_dst[11] = ptr[4*x -1];
1000 copy_dst[19] = ptr[4*x+s->linesize -1];
1001 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1002 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1008 s->hpc.pred4x4[mode](dst, topright, linesize);
1010 AV_COPY32(ptr+4*x , copy_dst+12);
1011 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1012 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1013 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1016 nnz = s->non_zero_count_cache[y][x];
1019 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1021 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1026 ptr += 4*s->linesize;
1031 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1032 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1034 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1036 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1037 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1039 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1040 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1041 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1042 s->filter.simple, 0);
1045 static const uint8_t subpel_idx[3][8] = {
1046 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1047 // also function pointer index
1048 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1049 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1055 * @param s VP8 decoding context
1056 * @param dst target buffer for block data at block position
1057 * @param ref reference picture buffer at origin (0, 0)
1058 * @param mv motion vector (relative to block position) to get pixel data from
1059 * @param x_off horizontal position of block from origin (0, 0)
1060 * @param y_off vertical position of block from origin (0, 0)
1061 * @param block_w width of block (16, 8 or 4)
1062 * @param block_h height of block (always same as block_w)
1063 * @param width width of src/dst plane data
1064 * @param height height of src/dst plane data
1065 * @param linesize size of a single line of plane data, including padding
1066 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1068 static av_always_inline
1069 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1070 int x_off, int y_off, int block_w, int block_h,
1071 int width, int height, int linesize,
1072 vp8_mc_func mc_func[3][3])
1074 uint8_t *src = ref->data[0];
1078 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1079 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1081 x_off += mv->x >> 2;
1082 y_off += mv->y >> 2;
1085 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1086 src += y_off * linesize + x_off;
1087 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1088 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1089 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1090 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1091 x_off - mx_idx, y_off - my_idx, width, height);
1092 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1094 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1096 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1097 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1102 * chroma MC function
1104 * @param s VP8 decoding context
1105 * @param dst1 target buffer for block data at block position (U plane)
1106 * @param dst2 target buffer for block data at block position (V plane)
1107 * @param ref reference picture buffer at origin (0, 0)
1108 * @param mv motion vector (relative to block position) to get pixel data from
1109 * @param x_off horizontal position of block from origin (0, 0)
1110 * @param y_off vertical position of block from origin (0, 0)
1111 * @param block_w width of block (16, 8 or 4)
1112 * @param block_h height of block (always same as block_w)
1113 * @param width width of src/dst plane data
1114 * @param height height of src/dst plane data
1115 * @param linesize size of a single line of plane data, including padding
1116 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1118 static av_always_inline
1119 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1120 const VP56mv *mv, int x_off, int y_off,
1121 int block_w, int block_h, int width, int height, int linesize,
1122 vp8_mc_func mc_func[3][3])
1124 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1127 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1128 int my = mv->y&7, my_idx = subpel_idx[0][my];
1130 x_off += mv->x >> 3;
1131 y_off += mv->y >> 3;
1134 src1 += y_off * linesize + x_off;
1135 src2 += y_off * linesize + x_off;
1136 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1137 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1138 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1139 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1140 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1141 x_off - mx_idx, y_off - my_idx, width, height);
1142 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1143 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1145 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1146 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1147 x_off - mx_idx, y_off - my_idx, width, height);
1148 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1149 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1151 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1152 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1155 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1156 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1157 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1161 static av_always_inline
1162 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1163 AVFrame *ref_frame, int x_off, int y_off,
1164 int bx_off, int by_off,
1165 int block_w, int block_h,
1166 int width, int height, VP56mv *mv)
1171 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1172 ref_frame, mv, x_off + bx_off, y_off + by_off,
1173 block_w, block_h, width, height, s->linesize,
1174 s->put_pixels_tab[block_w == 8]);
1177 if (s->profile == 3) {
1181 x_off >>= 1; y_off >>= 1;
1182 bx_off >>= 1; by_off >>= 1;
1183 width >>= 1; height >>= 1;
1184 block_w >>= 1; block_h >>= 1;
1185 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1186 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1187 &uvmv, x_off + bx_off, y_off + by_off,
1188 block_w, block_h, width, height, s->uvlinesize,
1189 s->put_pixels_tab[1 + (block_w == 4)]);
1192 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1193 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1194 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1196 /* Don't prefetch refs that haven't been used very often this frame. */
1197 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1198 int x_off = mb_x << 4, y_off = mb_y << 4;
1199 int mx = (mb->mv.x>>2) + x_off + 8;
1200 int my = (mb->mv.y>>2) + y_off;
1201 uint8_t **src= s->framep[ref]->data;
1202 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1203 /* For threading, a ff_thread_await_progress here might be useful, but
1204 * it actually slows down the decoder. Since a bad prefetch doesn't
1205 * generate bad decoder output, we don't run it here. */
1206 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1207 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1208 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1213 * Apply motion vectors to prediction buffer, chapter 18.
1215 static av_always_inline
1216 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1219 int x_off = mb_x << 4, y_off = mb_y << 4;
1220 int width = 16*s->mb_width, height = 16*s->mb_height;
1221 AVFrame *ref = s->framep[mb->ref_frame];
1222 VP56mv *bmv = mb->bmv;
1224 switch (mb->partitioning) {
1225 case VP8_SPLITMVMODE_NONE:
1226 vp8_mc_part(s, dst, ref, x_off, y_off,
1227 0, 0, 16, 16, width, height, &mb->mv);
1229 case VP8_SPLITMVMODE_4x4: {
1234 for (y = 0; y < 4; y++) {
1235 for (x = 0; x < 4; x++) {
1236 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1238 4*x + x_off, 4*y + y_off, 4, 4,
1239 width, height, s->linesize,
1240 s->put_pixels_tab[2]);
1245 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1246 for (y = 0; y < 2; y++) {
1247 for (x = 0; x < 2; x++) {
1248 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1249 mb->bmv[ 2*y * 4 + 2*x+1].x +
1250 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1251 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1252 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1253 mb->bmv[ 2*y * 4 + 2*x+1].y +
1254 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1255 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1256 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1257 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1258 if (s->profile == 3) {
1262 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1263 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1264 4*x + x_off, 4*y + y_off, 4, 4,
1265 width, height, s->uvlinesize,
1266 s->put_pixels_tab[2]);
1271 case VP8_SPLITMVMODE_16x8:
1272 vp8_mc_part(s, dst, ref, x_off, y_off,
1273 0, 0, 16, 8, width, height, &bmv[0]);
1274 vp8_mc_part(s, dst, ref, x_off, y_off,
1275 0, 8, 16, 8, width, height, &bmv[1]);
1277 case VP8_SPLITMVMODE_8x16:
1278 vp8_mc_part(s, dst, ref, x_off, y_off,
1279 0, 0, 8, 16, width, height, &bmv[0]);
1280 vp8_mc_part(s, dst, ref, x_off, y_off,
1281 8, 0, 8, 16, width, height, &bmv[1]);
1283 case VP8_SPLITMVMODE_8x8:
1284 vp8_mc_part(s, dst, ref, x_off, y_off,
1285 0, 0, 8, 8, width, height, &bmv[0]);
1286 vp8_mc_part(s, dst, ref, x_off, y_off,
1287 8, 0, 8, 8, width, height, &bmv[1]);
1288 vp8_mc_part(s, dst, ref, x_off, y_off,
1289 0, 8, 8, 8, width, height, &bmv[2]);
1290 vp8_mc_part(s, dst, ref, x_off, y_off,
1291 8, 8, 8, 8, width, height, &bmv[3]);
1296 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1300 if (mb->mode != MODE_I4x4) {
1301 uint8_t *y_dst = dst[0];
1302 for (y = 0; y < 4; y++) {
1303 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1305 if (nnz4&~0x01010101) {
1306 for (x = 0; x < 4; x++) {
1307 if ((uint8_t)nnz4 == 1)
1308 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1309 else if((uint8_t)nnz4 > 1)
1310 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1316 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1319 y_dst += 4*s->linesize;
1323 for (ch = 0; ch < 2; ch++) {
1324 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1326 uint8_t *ch_dst = dst[1+ch];
1327 if (nnz4&~0x01010101) {
1328 for (y = 0; y < 2; y++) {
1329 for (x = 0; x < 2; x++) {
1330 if ((uint8_t)nnz4 == 1)
1331 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1332 else if((uint8_t)nnz4 > 1)
1333 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1336 goto chroma_idct_end;
1338 ch_dst += 4*s->uvlinesize;
1341 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1348 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1350 int interior_limit, filter_level;
1352 if (s->segmentation.enabled) {
1353 filter_level = s->segmentation.filter_level[s->segment];
1354 if (!s->segmentation.absolute_vals)
1355 filter_level += s->filter.level;
1357 filter_level = s->filter.level;
1359 if (s->lf_delta.enabled) {
1360 filter_level += s->lf_delta.ref[mb->ref_frame];
1361 filter_level += s->lf_delta.mode[mb->mode];
1364 filter_level = av_clip_uintp2(filter_level, 6);
1366 interior_limit = filter_level;
1367 if (s->filter.sharpness) {
1368 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1369 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1371 interior_limit = FFMAX(interior_limit, 1);
1373 f->filter_level = filter_level;
1374 f->inner_limit = interior_limit;
1375 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1378 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1380 int mbedge_lim, bedge_lim, hev_thresh;
1381 int filter_level = f->filter_level;
1382 int inner_limit = f->inner_limit;
1383 int inner_filter = f->inner_filter;
1384 int linesize = s->linesize;
1385 int uvlinesize = s->uvlinesize;
1386 static const uint8_t hev_thresh_lut[2][64] = {
1387 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1388 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1389 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1391 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1392 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1393 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1400 bedge_lim = 2*filter_level + inner_limit;
1401 mbedge_lim = bedge_lim + 4;
1403 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1406 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1407 mbedge_lim, inner_limit, hev_thresh);
1408 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1409 mbedge_lim, inner_limit, hev_thresh);
1413 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1414 inner_limit, hev_thresh);
1415 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1416 inner_limit, hev_thresh);
1417 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1418 inner_limit, hev_thresh);
1419 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1420 uvlinesize, bedge_lim,
1421 inner_limit, hev_thresh);
1425 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1426 mbedge_lim, inner_limit, hev_thresh);
1427 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1428 mbedge_lim, inner_limit, hev_thresh);
1432 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1433 linesize, bedge_lim,
1434 inner_limit, hev_thresh);
1435 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1436 linesize, bedge_lim,
1437 inner_limit, hev_thresh);
1438 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1439 linesize, bedge_lim,
1440 inner_limit, hev_thresh);
1441 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1442 dst[2] + 4 * uvlinesize,
1443 uvlinesize, bedge_lim,
1444 inner_limit, hev_thresh);
1448 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1450 int mbedge_lim, bedge_lim;
1451 int filter_level = f->filter_level;
1452 int inner_limit = f->inner_limit;
1453 int inner_filter = f->inner_filter;
1454 int linesize = s->linesize;
1459 bedge_lim = 2*filter_level + inner_limit;
1460 mbedge_lim = bedge_lim + 4;
1463 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1465 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1466 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1467 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1471 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1473 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1474 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1475 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1479 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1481 VP8FilterStrength *f = s->filter_strength;
1483 curframe->data[0] + 16*mb_y*s->linesize,
1484 curframe->data[1] + 8*mb_y*s->uvlinesize,
1485 curframe->data[2] + 8*mb_y*s->uvlinesize
1489 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1490 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1491 filter_mb(s, dst, f++, mb_x, mb_y);
1498 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1500 VP8FilterStrength *f = s->filter_strength;
1501 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1504 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1505 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1506 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1511 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1514 VP8Context *s = avctx->priv_data;
1515 int ret, mb_x, mb_y, i, y, referenced;
1516 enum AVDiscard skip_thresh;
1517 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1519 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1522 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1523 || s->update_altref == VP56_FRAME_CURRENT;
1525 skip_thresh = !referenced ? AVDISCARD_NONREF :
1526 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1528 if (avctx->skip_frame >= skip_thresh) {
1532 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1534 // release no longer referenced frames
1535 for (i = 0; i < 5; i++)
1536 if (s->frames[i].data[0] &&
1537 &s->frames[i] != prev_frame &&
1538 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1539 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1540 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1541 ff_thread_release_buffer(avctx, &s->frames[i]);
1543 // find a free buffer
1544 for (i = 0; i < 5; i++)
1545 if (&s->frames[i] != prev_frame &&
1546 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1547 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1548 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1549 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1553 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1556 if (curframe->data[0])
1557 ff_thread_release_buffer(avctx, curframe);
1559 curframe->key_frame = s->keyframe;
1560 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1561 curframe->reference = referenced ? 3 : 0;
1562 curframe->ref_index[0] = s->segmentation_map;
1563 if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1564 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1568 // check if golden and altref are swapped
1569 if (s->update_altref != VP56_FRAME_NONE) {
1570 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1572 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1574 if (s->update_golden != VP56_FRAME_NONE) {
1575 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1577 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1579 if (s->update_last) {
1580 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1582 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1584 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1586 ff_thread_finish_setup(avctx);
1588 // Given that arithmetic probabilities are updated every frame, it's quite likely
1589 // that the values we have on a random interframe are complete junk if we didn't
1590 // start decode on a keyframe. So just don't display anything rather than junk.
1591 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1592 !s->framep[VP56_FRAME_GOLDEN] ||
1593 !s->framep[VP56_FRAME_GOLDEN2])) {
1594 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1595 return AVERROR_INVALIDDATA;
1598 s->linesize = curframe->linesize[0];
1599 s->uvlinesize = curframe->linesize[1];
1601 if (!s->edge_emu_buffer)
1602 s->edge_emu_buffer = av_malloc(21*s->linesize);
1604 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1606 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1607 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1609 // top edge of 127 for intra prediction
1610 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1611 s->top_border[0][15] = s->top_border[0][23] = 127;
1612 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1614 memset(s->ref_count, 0, sizeof(s->ref_count));
1616 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1618 #define MARGIN (16 << 2)
1619 s->mv_min.y = -MARGIN;
1620 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1622 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1623 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1624 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1625 int mb_xy = mb_y*s->mb_width;
1627 curframe->data[0] + 16*mb_y*s->linesize,
1628 curframe->data[1] + 8*mb_y*s->uvlinesize,
1629 curframe->data[2] + 8*mb_y*s->uvlinesize
1632 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1633 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1634 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1636 // left edge of 129 for intra prediction
1637 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1638 for (i = 0; i < 3; i++)
1639 for (y = 0; y < 16>>!!i; y++)
1640 dst[i][y*curframe->linesize[i]-1] = 129;
1641 if (mb_y == 1) // top left edge is also 129
1642 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1645 s->mv_min.x = -MARGIN;
1646 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1647 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1648 ff_thread_await_progress(prev_frame, mb_y, 0);
1650 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1651 /* Prefetch the current frame, 4 MBs ahead */
1652 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1653 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1655 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1656 prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1658 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1661 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1663 if (mb->mode <= MODE_I4x4)
1664 intra_predict(s, dst, mb, mb_x, mb_y);
1666 inter_predict(s, dst, mb, mb_x, mb_y);
1668 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1671 idct_mb(s, dst, mb);
1673 AV_ZERO64(s->left_nnz);
1674 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1676 // Reset DC block predictors if they would exist if the mb had coefficients
1677 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1679 s->top_nnz[mb_x][8] = 0;
1683 if (s->deblock_filter)
1684 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1686 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1694 if (s->deblock_filter) {
1695 if (s->filter.simple)
1696 filter_mb_row_simple(s, curframe, mb_y);
1698 filter_mb_row(s, curframe, mb_y);
1703 ff_thread_report_progress(curframe, mb_y, 0);
1706 ff_thread_report_progress(curframe, INT_MAX, 0);
1708 // if future frames don't use the updated probabilities,
1709 // reset them to the values we saved
1710 if (!s->update_probabilities)
1711 s->prob[0] = s->prob[1];
1713 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1715 if (!s->invisible) {
1716 *(AVFrame*)data = *curframe;
1717 *data_size = sizeof(AVFrame);
1723 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1725 VP8Context *s = avctx->priv_data;
1728 avctx->pix_fmt = PIX_FMT_YUV420P;
1730 dsputil_init(&s->dsp, avctx);
1731 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1732 ff_vp8dsp_init(&s->vp8dsp);
1737 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1739 vp8_decode_flush(avctx);
1743 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1745 VP8Context *s = avctx->priv_data;
1752 #define REBASE(pic) \
1753 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1755 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1757 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1759 if (s->macroblocks_base &&
1760 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
1764 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1765 s->segmentation = s_src->segmentation;
1766 s->lf_delta = s_src->lf_delta;
1767 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1769 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1770 s->framep[0] = REBASE(s_src->next_framep[0]);
1771 s->framep[1] = REBASE(s_src->next_framep[1]);
1772 s->framep[2] = REBASE(s_src->next_framep[2]);
1773 s->framep[3] = REBASE(s_src->next_framep[3]);
1778 AVCodec ff_vp8_decoder = {
1780 .type = AVMEDIA_TYPE_VIDEO,
1782 .priv_data_size = sizeof(VP8Context),
1783 .init = vp8_decode_init,
1784 .close = vp8_decode_free,
1785 .decode = vp8_decode_frame,
1786 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1787 .flush = vp8_decode_flush,
1788 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1789 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1790 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),