2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void vp8_decode_flush(AVCodecContext *avctx)
38 VP8Context *s = avctx->priv_data;
41 if (!avctx->is_copy) {
42 for (i = 0; i < 5; i++)
43 if (s->frames[i].data[0])
44 ff_thread_release_buffer(avctx, &s->frames[i]);
46 memset(s->framep, 0, sizeof(s->framep));
48 av_freep(&s->macroblocks_base);
49 av_freep(&s->filter_strength);
50 av_freep(&s->intra4x4_pred_mode_top);
51 av_freep(&s->top_nnz);
52 av_freep(&s->edge_emu_buffer);
53 av_freep(&s->top_border);
54 av_freep(&s->segmentation_map);
56 s->macroblocks = NULL;
59 static int update_dimensions(VP8Context *s, int width, int height)
61 if (width != s->avctx->width ||
62 height != s->avctx->height) {
63 if (av_image_check_size(width, height, 0, s->avctx))
64 return AVERROR_INVALIDDATA;
66 vp8_decode_flush(s->avctx);
68 avcodec_set_dimensions(s->avctx, width, height);
71 s->mb_width = (s->avctx->coded_width +15) / 16;
72 s->mb_height = (s->avctx->coded_height+15) / 16;
74 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
75 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
76 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
77 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
78 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
79 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
81 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
82 !s->top_nnz || !s->top_border || !s->segmentation_map)
83 return AVERROR(ENOMEM);
85 s->macroblocks = s->macroblocks_base + 1;
90 static void parse_segment_info(VP8Context *s)
92 VP56RangeCoder *c = &s->c;
95 s->segmentation.update_map = vp8_rac_get(c);
97 if (vp8_rac_get(c)) { // update segment feature data
98 s->segmentation.absolute_vals = vp8_rac_get(c);
100 for (i = 0; i < 4; i++)
101 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
103 for (i = 0; i < 4; i++)
104 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
106 if (s->segmentation.update_map)
107 for (i = 0; i < 3; i++)
108 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
111 static void update_lf_deltas(VP8Context *s)
113 VP56RangeCoder *c = &s->c;
116 for (i = 0; i < 4; i++)
117 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
119 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
120 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
125 const uint8_t *sizes = buf;
128 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
130 buf += 3*(s->num_coeff_partitions-1);
131 buf_size -= 3*(s->num_coeff_partitions-1);
135 for (i = 0; i < s->num_coeff_partitions-1; i++) {
136 int size = AV_RL24(sizes + 3*i);
137 if (buf_size - size < 0)
140 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
144 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
149 static void get_quants(VP8Context *s)
151 VP56RangeCoder *c = &s->c;
154 int yac_qi = vp8_rac_get_uint(c, 7);
155 int ydc_delta = vp8_rac_get_sint(c, 4);
156 int y2dc_delta = vp8_rac_get_sint(c, 4);
157 int y2ac_delta = vp8_rac_get_sint(c, 4);
158 int uvdc_delta = vp8_rac_get_sint(c, 4);
159 int uvac_delta = vp8_rac_get_sint(c, 4);
161 for (i = 0; i < 4; i++) {
162 if (s->segmentation.enabled) {
163 base_qi = s->segmentation.base_quant[i];
164 if (!s->segmentation.absolute_vals)
169 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
170 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
171 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
172 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
173 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
174 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
176 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
177 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
182 * Determine which buffers golden and altref should be updated with after this frame.
183 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
185 * Intra frames update all 3 references
186 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
187 * If the update (golden|altref) flag is set, it's updated with the current frame
188 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
189 * If the flag is not set, the number read means:
191 * 1: VP56_FRAME_PREVIOUS
192 * 2: update golden with altref, or update altref with golden
194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
196 VP56RangeCoder *c = &s->c;
199 return VP56_FRAME_CURRENT;
201 switch (vp8_rac_get_uint(c, 2)) {
203 return VP56_FRAME_PREVIOUS;
205 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
207 return VP56_FRAME_NONE;
210 static void update_refs(VP8Context *s)
212 VP56RangeCoder *c = &s->c;
214 int update_golden = vp8_rac_get(c);
215 int update_altref = vp8_rac_get(c);
217 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
218 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
223 VP56RangeCoder *c = &s->c;
224 int header_size, hscale, vscale, i, j, k, l, m, ret;
225 int width = s->avctx->width;
226 int height = s->avctx->height;
228 s->keyframe = !(buf[0] & 1);
229 s->profile = (buf[0]>>1) & 7;
230 s->invisible = !(buf[0] & 0x10);
231 header_size = AV_RL24(buf) >> 5;
236 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
239 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
240 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
241 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
243 if (header_size > buf_size - 7*s->keyframe) {
244 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
245 return AVERROR_INVALIDDATA;
249 if (AV_RL24(buf) != 0x2a019d) {
250 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
251 return AVERROR_INVALIDDATA;
253 width = AV_RL16(buf+3) & 0x3fff;
254 height = AV_RL16(buf+5) & 0x3fff;
255 hscale = buf[4] >> 6;
256 vscale = buf[6] >> 6;
260 if (hscale || vscale)
261 av_log_missing_feature(s->avctx, "Upscaling", 1);
263 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
264 for (i = 0; i < 4; i++)
265 for (j = 0; j < 16; j++)
266 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
267 sizeof(s->prob->token[i][j]));
268 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
269 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
270 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
271 memset(&s->segmentation, 0, sizeof(s->segmentation));
274 if (!s->macroblocks_base || /* first frame */
275 width != s->avctx->width || height != s->avctx->height) {
276 if ((ret = update_dimensions(s, width, height) < 0))
280 ff_vp56_init_range_decoder(c, buf, header_size);
282 buf_size -= header_size;
286 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
287 vp8_rac_get(c); // whether we can skip clamping in dsp functions
290 if ((s->segmentation.enabled = vp8_rac_get(c)))
291 parse_segment_info(s);
293 s->segmentation.update_map = 0; // FIXME: move this to some init function?
295 s->filter.simple = vp8_rac_get(c);
296 s->filter.level = vp8_rac_get_uint(c, 6);
297 s->filter.sharpness = vp8_rac_get_uint(c, 3);
299 if ((s->lf_delta.enabled = vp8_rac_get(c)))
303 if (setup_partitions(s, buf, buf_size)) {
304 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
305 return AVERROR_INVALIDDATA;
312 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
313 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
316 // if we aren't saving this frame's probabilities for future frames,
317 // make a copy of the current probabilities
318 if (!(s->update_probabilities = vp8_rac_get(c)))
319 s->prob[1] = s->prob[0];
321 s->update_last = s->keyframe || vp8_rac_get(c);
323 for (i = 0; i < 4; i++)
324 for (j = 0; j < 8; j++)
325 for (k = 0; k < 3; k++)
326 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
327 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
328 int prob = vp8_rac_get_uint(c, 8);
329 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
330 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
333 if ((s->mbskip_enabled = vp8_rac_get(c)))
334 s->prob->mbskip = vp8_rac_get_uint(c, 8);
337 s->prob->intra = vp8_rac_get_uint(c, 8);
338 s->prob->last = vp8_rac_get_uint(c, 8);
339 s->prob->golden = vp8_rac_get_uint(c, 8);
342 for (i = 0; i < 4; i++)
343 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
345 for (i = 0; i < 3; i++)
346 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
348 // 17.2 MV probability update
349 for (i = 0; i < 2; i++)
350 for (j = 0; j < 19; j++)
351 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
352 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
360 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
361 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
365 * Motion vector coding, 17.1.
367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
371 if (vp56_rac_get_prob_branchy(c, p[0])) {
374 for (i = 0; i < 3; i++)
375 x += vp56_rac_get_prob(c, p[9 + i]) << i;
376 for (i = 9; i > 3; i--)
377 x += vp56_rac_get_prob(c, p[9 + i]) << i;
378 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
382 const uint8_t *ps = p+2;
383 bit = vp56_rac_get_prob(c, *ps);
386 bit = vp56_rac_get_prob(c, *ps);
389 x += vp56_rac_get_prob(c, *ps);
392 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
395 static av_always_inline
396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
399 return vp8_submv_prob[4-!!left];
401 return vp8_submv_prob[2];
402 return vp8_submv_prob[1-!!left];
406 * Split motion vector prediction, 16.4.
407 * @returns the number of motion vectors parsed (2, 4 or 16)
409 static av_always_inline
410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
414 VP8Macroblock *top_mb = &mb[2];
415 VP8Macroblock *left_mb = &mb[-1];
416 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
417 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
418 *mbsplits_cur, *firstidx;
419 VP56mv *top_mv = top_mb->bmv;
420 VP56mv *left_mv = left_mb->bmv;
421 VP56mv *cur_mv = mb->bmv;
423 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
424 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
425 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
427 part_idx = VP8_SPLITMVMODE_8x8;
430 part_idx = VP8_SPLITMVMODE_4x4;
433 num = vp8_mbsplit_count[part_idx];
434 mbsplits_cur = vp8_mbsplits[part_idx],
435 firstidx = vp8_mbfirstidx[part_idx];
436 mb->partitioning = part_idx;
438 for (n = 0; n < num; n++) {
440 uint32_t left, above;
441 const uint8_t *submv_prob;
444 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
446 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
448 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
450 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
452 submv_prob = get_submv_prob(left, above);
454 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
455 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
456 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
457 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
458 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
460 AV_ZERO32(&mb->bmv[n]);
463 AV_WN32A(&mb->bmv[n], above);
466 AV_WN32A(&mb->bmv[n], left);
473 static av_always_inline
474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
476 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
478 mb + 1 /* top-left */ };
479 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
480 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
482 int cur_sign_bias = s->sign_bias[mb->ref_frame];
483 int8_t *sign_bias = s->sign_bias;
485 uint8_t cnt[4] = { 0 };
486 VP56RangeCoder *c = &s->c;
488 AV_ZERO32(&near_mv[0]);
489 AV_ZERO32(&near_mv[1]);
491 /* Process MB on top, left and top-left */
492 #define MV_EDGE_CHECK(n)\
494 VP8Macroblock *edge = mb_edge[n];\
495 int edge_ref = edge->ref_frame;\
496 if (edge_ref != VP56_FRAME_CURRENT) {\
497 uint32_t mv = AV_RN32A(&edge->mv);\
499 if (cur_sign_bias != sign_bias[edge_ref]) {\
500 /* SWAR negate of the values in mv. */\
502 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
504 if (!n || mv != AV_RN32A(&near_mv[idx]))\
505 AV_WN32A(&near_mv[++idx], mv);\
506 cnt[idx] += 1 + (n != 2);\
508 cnt[CNT_ZERO] += 1 + (n != 2);\
516 mb->partitioning = VP8_SPLITMVMODE_NONE;
517 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
518 mb->mode = VP8_MVMODE_MV;
520 /* If we have three distinct MVs, merge first and last if they're the same */
521 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
522 cnt[CNT_NEAREST] += 1;
524 /* Swap near and nearest if necessary */
525 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
526 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
527 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
530 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
531 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
533 /* Choose the best mv out of 0,0 and the nearest mv */
534 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
535 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
536 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
537 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
539 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
540 mb->mode = VP8_MVMODE_SPLIT;
541 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
543 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
544 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
548 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
552 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
556 mb->mode = VP8_MVMODE_ZERO;
562 static av_always_inline
563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
564 int mb_x, int keyframe)
566 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
569 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
570 uint8_t* const left = s->intra4x4_pred_mode_left;
571 for (y = 0; y < 4; y++) {
572 for (x = 0; x < 4; x++) {
574 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
575 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
576 left[y] = top[x] = *intra4x4;
582 for (i = 0; i < 16; i++)
583 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
587 static av_always_inline
588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
590 VP56RangeCoder *c = &s->c;
592 if (s->segmentation.update_map)
593 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
595 *segment = ref ? *ref : *segment;
596 s->segment = *segment;
598 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
601 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
603 if (mb->mode == MODE_I4x4) {
604 decode_intra4x4_modes(s, c, mb_x, 1);
606 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
607 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
608 AV_WN32A(s->intra4x4_pred_mode_left, modes);
611 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
612 mb->ref_frame = VP56_FRAME_CURRENT;
613 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
615 if (vp56_rac_get_prob_branchy(c, s->prob->last))
616 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
617 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
619 mb->ref_frame = VP56_FRAME_PREVIOUS;
620 s->ref_count[mb->ref_frame-1]++;
622 // motion vectors, 16.3
623 decode_mvs(s, mb, mb_x, mb_y);
626 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
628 if (mb->mode == MODE_I4x4)
629 decode_intra4x4_modes(s, c, mb_x, 0);
631 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
632 mb->ref_frame = VP56_FRAME_CURRENT;
633 mb->partitioning = VP8_SPLITMVMODE_NONE;
634 AV_ZERO32(&mb->bmv[0]);
638 #ifndef decode_block_coeffs_internal
640 * @param c arithmetic bitstream reader context
641 * @param block destination for block coefficients
642 * @param probs probabilities to use when reading trees from the bitstream
643 * @param i initial coeff index, 0 unless a separate DC block is coded
644 * @param qmul array holding the dc/ac dequant factor at position 0/1
645 * @return 0 if no coeffs were decoded
646 * otherwise, the index of the last coeff decoded plus one
648 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
649 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
650 int i, uint8_t *token_prob, int16_t qmul[2])
655 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
659 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
661 return i; // invalid input; blocks should end with EOB
662 token_prob = probs[i][0];
666 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
668 token_prob = probs[i+1][1];
670 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
671 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
673 coeff += vp56_rac_get_prob(c, token_prob[5]);
677 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
678 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
679 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
682 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
683 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
685 } else { // DCT_CAT3 and up
686 int a = vp56_rac_get_prob(c, token_prob[8]);
687 int b = vp56_rac_get_prob(c, token_prob[9+a]);
688 int cat = (a<<1) + b;
689 coeff = 3 + (8<<cat);
690 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
693 token_prob = probs[i+1][2];
695 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
703 * @param c arithmetic bitstream reader context
704 * @param block destination for block coefficients
705 * @param probs probabilities to use when reading trees from the bitstream
706 * @param i initial coeff index, 0 unless a separate DC block is coded
707 * @param zero_nhood the initial prediction context for number of surrounding
708 * all-zero blocks (only left/top, so 0-2)
709 * @param qmul array holding the dc/ac dequant factor at position 0/1
710 * @return 0 if no coeffs were decoded
711 * otherwise, the index of the last coeff decoded plus one
713 static av_always_inline
714 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
715 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
716 int i, int zero_nhood, int16_t qmul[2])
718 uint8_t *token_prob = probs[i][zero_nhood];
719 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
721 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
724 static av_always_inline
725 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
726 uint8_t t_nnz[9], uint8_t l_nnz[9])
728 int i, x, y, luma_start = 0, luma_ctx = 3;
729 int nnz_pred, nnz, nnz_total = 0;
730 int segment = s->segment;
733 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
734 nnz_pred = t_nnz[8] + l_nnz[8];
736 // decode DC values and do hadamard
737 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
738 s->qmat[segment].luma_dc_qmul);
739 l_nnz[8] = t_nnz[8] = !!nnz;
744 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
746 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
753 for (y = 0; y < 4; y++)
754 for (x = 0; x < 4; x++) {
755 nnz_pred = l_nnz[y] + t_nnz[x];
756 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
757 nnz_pred, s->qmat[segment].luma_qmul);
758 // nnz+block_dc may be one more than the actual last index, but we don't care
759 s->non_zero_count_cache[y][x] = nnz + block_dc;
760 t_nnz[x] = l_nnz[y] = !!nnz;
765 // TODO: what to do about dimensions? 2nd dim for luma is x,
766 // but for chroma it's (y<<1)|x
767 for (i = 4; i < 6; i++)
768 for (y = 0; y < 2; y++)
769 for (x = 0; x < 2; x++) {
770 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
771 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
772 nnz_pred, s->qmat[segment].chroma_qmul);
773 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
774 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
778 // if there were no coded coeffs despite the macroblock not being marked skip,
779 // we MUST not do the inner loop filter and should not do IDCT
780 // Since skip isn't used for bitstream prediction, just manually set it.
785 static av_always_inline
786 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
787 int linesize, int uvlinesize, int simple)
789 AV_COPY128(top_border, src_y + 15*linesize);
791 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
792 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
796 static av_always_inline
797 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
798 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
799 int simple, int xchg)
801 uint8_t *top_border_m1 = top_border-32; // for TL prediction
803 src_cb -= uvlinesize;
804 src_cr -= uvlinesize;
806 #define XCHG(a,b,xchg) do { \
807 if (xchg) AV_SWAP64(b,a); \
808 else AV_COPY64(b,a); \
811 XCHG(top_border_m1+8, src_y-8, xchg);
812 XCHG(top_border, src_y, xchg);
813 XCHG(top_border+8, src_y+8, 1);
814 if (mb_x < mb_width-1)
815 XCHG(top_border+32, src_y+16, 1);
817 // only copy chroma for normal loop filter
818 // or to initialize the top row to 127
819 if (!simple || !mb_y) {
820 XCHG(top_border_m1+16, src_cb-8, xchg);
821 XCHG(top_border_m1+24, src_cr-8, xchg);
822 XCHG(top_border+16, src_cb, 1);
823 XCHG(top_border+24, src_cr, 1);
827 static av_always_inline
828 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
831 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
833 return mb_y ? mode : LEFT_DC_PRED8x8;
837 static av_always_inline
838 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
841 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
843 return mb_y ? mode : HOR_PRED8x8;
847 static av_always_inline
848 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
850 if (mode == DC_PRED8x8) {
851 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
857 static av_always_inline
858 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
862 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
864 return !mb_y ? DC_127_PRED8x8 : mode;
866 return !mb_x ? DC_129_PRED8x8 : mode;
867 case PLANE_PRED8x8 /*TM*/:
868 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
873 static av_always_inline
874 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
877 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
879 return mb_y ? mode : HOR_VP8_PRED;
883 static av_always_inline
884 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
893 case DIAG_DOWN_LEFT_PRED:
895 return !mb_y ? DC_127_PRED : mode;
903 return !mb_x ? DC_129_PRED : mode;
905 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
906 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
907 case DIAG_DOWN_RIGHT_PRED:
908 case VERT_RIGHT_PRED:
917 static av_always_inline
918 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
921 AVCodecContext *avctx = s->avctx;
925 // for the first row, we need to run xchg_mb_border to init the top edge to 127
926 // otherwise, skip it if we aren't going to deblock
927 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
928 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
929 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
930 s->filter.simple, 1);
932 if (mb->mode < MODE_I4x4) {
933 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
934 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
936 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
938 s->hpc.pred16x16[mode](dst[0], s->linesize);
940 uint8_t *ptr = dst[0];
941 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
942 uint8_t tr_top[4] = { 127, 127, 127, 127 };
944 // all blocks on the right edge of the macroblock use bottom edge
945 // the top macroblock for their topright edge
946 uint8_t *tr_right = ptr - s->linesize + 16;
948 // if we're on the right edge of the frame, said edge is extended
949 // from the top macroblock
950 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
951 mb_x == s->mb_width-1) {
952 tr = tr_right[-1]*0x01010101u;
953 tr_right = (uint8_t *)&tr;
957 AV_ZERO128(s->non_zero_count_cache);
959 for (y = 0; y < 4; y++) {
960 uint8_t *topright = ptr + 4 - s->linesize;
961 for (x = 0; x < 4; x++) {
962 int copy = 0, linesize = s->linesize;
963 uint8_t *dst = ptr+4*x;
964 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
966 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
971 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
972 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
978 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
980 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
984 copy_dst[3] = ptr[4*x-s->linesize-1];
993 copy_dst[11] = ptr[4*x -1];
994 copy_dst[19] = ptr[4*x+s->linesize -1];
995 copy_dst[27] = ptr[4*x+s->linesize*2-1];
996 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1002 s->hpc.pred4x4[mode](dst, topright, linesize);
1004 AV_COPY32(ptr+4*x , copy_dst+12);
1005 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1006 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1007 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1010 nnz = s->non_zero_count_cache[y][x];
1013 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1015 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1020 ptr += 4*s->linesize;
1025 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1026 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1028 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1030 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1031 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1033 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1034 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1035 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1036 s->filter.simple, 0);
1039 static const uint8_t subpel_idx[3][8] = {
1040 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1041 // also function pointer index
1042 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1043 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1049 * @param s VP8 decoding context
1050 * @param dst target buffer for block data at block position
1051 * @param ref reference picture buffer at origin (0, 0)
1052 * @param mv motion vector (relative to block position) to get pixel data from
1053 * @param x_off horizontal position of block from origin (0, 0)
1054 * @param y_off vertical position of block from origin (0, 0)
1055 * @param block_w width of block (16, 8 or 4)
1056 * @param block_h height of block (always same as block_w)
1057 * @param width width of src/dst plane data
1058 * @param height height of src/dst plane data
1059 * @param linesize size of a single line of plane data, including padding
1060 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1062 static av_always_inline
1063 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1064 int x_off, int y_off, int block_w, int block_h,
1065 int width, int height, int linesize,
1066 vp8_mc_func mc_func[3][3])
1068 uint8_t *src = ref->data[0];
1072 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1073 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1075 x_off += mv->x >> 2;
1076 y_off += mv->y >> 2;
1079 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1080 src += y_off * linesize + x_off;
1081 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1082 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1083 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1084 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1085 x_off - mx_idx, y_off - my_idx, width, height);
1086 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1088 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1090 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1091 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1096 * chroma MC function
1098 * @param s VP8 decoding context
1099 * @param dst1 target buffer for block data at block position (U plane)
1100 * @param dst2 target buffer for block data at block position (V plane)
1101 * @param ref reference picture buffer at origin (0, 0)
1102 * @param mv motion vector (relative to block position) to get pixel data from
1103 * @param x_off horizontal position of block from origin (0, 0)
1104 * @param y_off vertical position of block from origin (0, 0)
1105 * @param block_w width of block (16, 8 or 4)
1106 * @param block_h height of block (always same as block_w)
1107 * @param width width of src/dst plane data
1108 * @param height height of src/dst plane data
1109 * @param linesize size of a single line of plane data, including padding
1110 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1112 static av_always_inline
1113 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1114 const VP56mv *mv, int x_off, int y_off,
1115 int block_w, int block_h, int width, int height, int linesize,
1116 vp8_mc_func mc_func[3][3])
1118 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1121 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1122 int my = mv->y&7, my_idx = subpel_idx[0][my];
1124 x_off += mv->x >> 3;
1125 y_off += mv->y >> 3;
1128 src1 += y_off * linesize + x_off;
1129 src2 += y_off * linesize + x_off;
1130 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1131 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1132 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1133 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1134 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1135 x_off - mx_idx, y_off - my_idx, width, height);
1136 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1137 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1139 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1140 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1141 x_off - mx_idx, y_off - my_idx, width, height);
1142 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1143 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1145 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1146 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1149 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1150 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1151 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1155 static av_always_inline
1156 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1157 AVFrame *ref_frame, int x_off, int y_off,
1158 int bx_off, int by_off,
1159 int block_w, int block_h,
1160 int width, int height, VP56mv *mv)
1165 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1166 ref_frame, mv, x_off + bx_off, y_off + by_off,
1167 block_w, block_h, width, height, s->linesize,
1168 s->put_pixels_tab[block_w == 8]);
1171 if (s->profile == 3) {
1175 x_off >>= 1; y_off >>= 1;
1176 bx_off >>= 1; by_off >>= 1;
1177 width >>= 1; height >>= 1;
1178 block_w >>= 1; block_h >>= 1;
1179 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1180 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1181 &uvmv, x_off + bx_off, y_off + by_off,
1182 block_w, block_h, width, height, s->uvlinesize,
1183 s->put_pixels_tab[1 + (block_w == 4)]);
1186 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1187 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1188 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1190 /* Don't prefetch refs that haven't been used very often this frame. */
1191 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1192 int x_off = mb_x << 4, y_off = mb_y << 4;
1193 int mx = (mb->mv.x>>2) + x_off + 8;
1194 int my = (mb->mv.y>>2) + y_off;
1195 uint8_t **src= s->framep[ref]->data;
1196 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1197 /* For threading, a ff_thread_await_progress here might be useful, but
1198 * it actually slows down the decoder. Since a bad prefetch doesn't
1199 * generate bad decoder output, we don't run it here. */
1200 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1201 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1202 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1207 * Apply motion vectors to prediction buffer, chapter 18.
1209 static av_always_inline
1210 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1213 int x_off = mb_x << 4, y_off = mb_y << 4;
1214 int width = 16*s->mb_width, height = 16*s->mb_height;
1215 AVFrame *ref = s->framep[mb->ref_frame];
1216 VP56mv *bmv = mb->bmv;
1218 switch (mb->partitioning) {
1219 case VP8_SPLITMVMODE_NONE:
1220 vp8_mc_part(s, dst, ref, x_off, y_off,
1221 0, 0, 16, 16, width, height, &mb->mv);
1223 case VP8_SPLITMVMODE_4x4: {
1228 for (y = 0; y < 4; y++) {
1229 for (x = 0; x < 4; x++) {
1230 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1232 4*x + x_off, 4*y + y_off, 4, 4,
1233 width, height, s->linesize,
1234 s->put_pixels_tab[2]);
1239 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1240 for (y = 0; y < 2; y++) {
1241 for (x = 0; x < 2; x++) {
1242 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1243 mb->bmv[ 2*y * 4 + 2*x+1].x +
1244 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1245 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1246 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1247 mb->bmv[ 2*y * 4 + 2*x+1].y +
1248 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1249 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1250 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1251 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1252 if (s->profile == 3) {
1256 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1257 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1258 4*x + x_off, 4*y + y_off, 4, 4,
1259 width, height, s->uvlinesize,
1260 s->put_pixels_tab[2]);
1265 case VP8_SPLITMVMODE_16x8:
1266 vp8_mc_part(s, dst, ref, x_off, y_off,
1267 0, 0, 16, 8, width, height, &bmv[0]);
1268 vp8_mc_part(s, dst, ref, x_off, y_off,
1269 0, 8, 16, 8, width, height, &bmv[1]);
1271 case VP8_SPLITMVMODE_8x16:
1272 vp8_mc_part(s, dst, ref, x_off, y_off,
1273 0, 0, 8, 16, width, height, &bmv[0]);
1274 vp8_mc_part(s, dst, ref, x_off, y_off,
1275 8, 0, 8, 16, width, height, &bmv[1]);
1277 case VP8_SPLITMVMODE_8x8:
1278 vp8_mc_part(s, dst, ref, x_off, y_off,
1279 0, 0, 8, 8, width, height, &bmv[0]);
1280 vp8_mc_part(s, dst, ref, x_off, y_off,
1281 8, 0, 8, 8, width, height, &bmv[1]);
1282 vp8_mc_part(s, dst, ref, x_off, y_off,
1283 0, 8, 8, 8, width, height, &bmv[2]);
1284 vp8_mc_part(s, dst, ref, x_off, y_off,
1285 8, 8, 8, 8, width, height, &bmv[3]);
1290 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1294 if (mb->mode != MODE_I4x4) {
1295 uint8_t *y_dst = dst[0];
1296 for (y = 0; y < 4; y++) {
1297 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1299 if (nnz4&~0x01010101) {
1300 for (x = 0; x < 4; x++) {
1301 if ((uint8_t)nnz4 == 1)
1302 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1303 else if((uint8_t)nnz4 > 1)
1304 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1310 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1313 y_dst += 4*s->linesize;
1317 for (ch = 0; ch < 2; ch++) {
1318 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1320 uint8_t *ch_dst = dst[1+ch];
1321 if (nnz4&~0x01010101) {
1322 for (y = 0; y < 2; y++) {
1323 for (x = 0; x < 2; x++) {
1324 if ((uint8_t)nnz4 == 1)
1325 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1326 else if((uint8_t)nnz4 > 1)
1327 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1330 goto chroma_idct_end;
1332 ch_dst += 4*s->uvlinesize;
1335 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1342 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1344 int interior_limit, filter_level;
1346 if (s->segmentation.enabled) {
1347 filter_level = s->segmentation.filter_level[s->segment];
1348 if (!s->segmentation.absolute_vals)
1349 filter_level += s->filter.level;
1351 filter_level = s->filter.level;
1353 if (s->lf_delta.enabled) {
1354 filter_level += s->lf_delta.ref[mb->ref_frame];
1355 filter_level += s->lf_delta.mode[mb->mode];
1358 filter_level = av_clip_uintp2(filter_level, 6);
1360 interior_limit = filter_level;
1361 if (s->filter.sharpness) {
1362 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1363 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1365 interior_limit = FFMAX(interior_limit, 1);
1367 f->filter_level = filter_level;
1368 f->inner_limit = interior_limit;
1369 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1372 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1374 int mbedge_lim, bedge_lim, hev_thresh;
1375 int filter_level = f->filter_level;
1376 int inner_limit = f->inner_limit;
1377 int inner_filter = f->inner_filter;
1378 int linesize = s->linesize;
1379 int uvlinesize = s->uvlinesize;
1380 static const uint8_t hev_thresh_lut[2][64] = {
1381 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1382 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1383 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1385 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1386 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1387 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1394 bedge_lim = 2*filter_level + inner_limit;
1395 mbedge_lim = bedge_lim + 4;
1397 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1400 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1401 mbedge_lim, inner_limit, hev_thresh);
1402 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1403 mbedge_lim, inner_limit, hev_thresh);
1407 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1408 inner_limit, hev_thresh);
1409 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1410 inner_limit, hev_thresh);
1411 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1412 inner_limit, hev_thresh);
1413 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1414 uvlinesize, bedge_lim,
1415 inner_limit, hev_thresh);
1419 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1420 mbedge_lim, inner_limit, hev_thresh);
1421 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1422 mbedge_lim, inner_limit, hev_thresh);
1426 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1427 linesize, bedge_lim,
1428 inner_limit, hev_thresh);
1429 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1430 linesize, bedge_lim,
1431 inner_limit, hev_thresh);
1432 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1433 linesize, bedge_lim,
1434 inner_limit, hev_thresh);
1435 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1436 dst[2] + 4 * uvlinesize,
1437 uvlinesize, bedge_lim,
1438 inner_limit, hev_thresh);
1442 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1444 int mbedge_lim, bedge_lim;
1445 int filter_level = f->filter_level;
1446 int inner_limit = f->inner_limit;
1447 int inner_filter = f->inner_filter;
1448 int linesize = s->linesize;
1453 bedge_lim = 2*filter_level + inner_limit;
1454 mbedge_lim = bedge_lim + 4;
1457 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1459 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1460 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1461 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1465 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1467 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1468 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1469 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1473 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1475 VP8FilterStrength *f = s->filter_strength;
1477 curframe->data[0] + 16*mb_y*s->linesize,
1478 curframe->data[1] + 8*mb_y*s->uvlinesize,
1479 curframe->data[2] + 8*mb_y*s->uvlinesize
1483 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1484 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1485 filter_mb(s, dst, f++, mb_x, mb_y);
1492 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1494 VP8FilterStrength *f = s->filter_strength;
1495 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1498 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1499 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1500 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1505 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1508 VP8Context *s = avctx->priv_data;
1509 int ret, mb_x, mb_y, i, y, referenced;
1510 enum AVDiscard skip_thresh;
1511 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1513 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1516 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1517 || s->update_altref == VP56_FRAME_CURRENT;
1519 skip_thresh = !referenced ? AVDISCARD_NONREF :
1520 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1522 if (avctx->skip_frame >= skip_thresh) {
1526 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1528 // release no longer referenced frames
1529 for (i = 0; i < 5; i++)
1530 if (s->frames[i].data[0] &&
1531 &s->frames[i] != prev_frame &&
1532 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1533 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1534 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1535 ff_thread_release_buffer(avctx, &s->frames[i]);
1537 // find a free buffer
1538 for (i = 0; i < 5; i++)
1539 if (&s->frames[i] != prev_frame &&
1540 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1541 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1542 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1543 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1547 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1550 if (curframe->data[0])
1551 ff_thread_release_buffer(avctx, curframe);
1553 curframe->key_frame = s->keyframe;
1554 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1555 curframe->reference = referenced ? 3 : 0;
1556 curframe->ref_index[0] = s->segmentation_map;
1557 if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1558 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1562 // check if golden and altref are swapped
1563 if (s->update_altref != VP56_FRAME_NONE) {
1564 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1566 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1568 if (s->update_golden != VP56_FRAME_NONE) {
1569 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1571 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1573 if (s->update_last) {
1574 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1576 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1578 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1580 ff_thread_finish_setup(avctx);
1582 // Given that arithmetic probabilities are updated every frame, it's quite likely
1583 // that the values we have on a random interframe are complete junk if we didn't
1584 // start decode on a keyframe. So just don't display anything rather than junk.
1585 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1586 !s->framep[VP56_FRAME_GOLDEN] ||
1587 !s->framep[VP56_FRAME_GOLDEN2])) {
1588 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1589 return AVERROR_INVALIDDATA;
1592 s->linesize = curframe->linesize[0];
1593 s->uvlinesize = curframe->linesize[1];
1595 if (!s->edge_emu_buffer)
1596 s->edge_emu_buffer = av_malloc(21*s->linesize);
1598 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1600 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1601 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1603 // top edge of 127 for intra prediction
1604 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1605 s->top_border[0][15] = s->top_border[0][23] = 127;
1606 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1608 memset(s->ref_count, 0, sizeof(s->ref_count));
1610 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1612 #define MARGIN (16 << 2)
1613 s->mv_min.y = -MARGIN;
1614 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1616 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1617 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1618 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1619 int mb_xy = mb_y*s->mb_width;
1621 curframe->data[0] + 16*mb_y*s->linesize,
1622 curframe->data[1] + 8*mb_y*s->uvlinesize,
1623 curframe->data[2] + 8*mb_y*s->uvlinesize
1626 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1627 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1628 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1630 // left edge of 129 for intra prediction
1631 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1632 for (i = 0; i < 3; i++)
1633 for (y = 0; y < 16>>!!i; y++)
1634 dst[i][y*curframe->linesize[i]-1] = 129;
1635 if (mb_y == 1) // top left edge is also 129
1636 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1639 s->mv_min.x = -MARGIN;
1640 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1641 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1642 ff_thread_await_progress(prev_frame, mb_y, 0);
1644 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1645 /* Prefetch the current frame, 4 MBs ahead */
1646 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1647 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1649 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1650 prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1652 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1655 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1657 if (mb->mode <= MODE_I4x4)
1658 intra_predict(s, dst, mb, mb_x, mb_y);
1660 inter_predict(s, dst, mb, mb_x, mb_y);
1662 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1665 idct_mb(s, dst, mb);
1667 AV_ZERO64(s->left_nnz);
1668 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1670 // Reset DC block predictors if they would exist if the mb had coefficients
1671 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1673 s->top_nnz[mb_x][8] = 0;
1677 if (s->deblock_filter)
1678 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1680 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1688 if (s->deblock_filter) {
1689 if (s->filter.simple)
1690 filter_mb_row_simple(s, curframe, mb_y);
1692 filter_mb_row(s, curframe, mb_y);
1697 ff_thread_report_progress(curframe, mb_y, 0);
1700 ff_thread_report_progress(curframe, INT_MAX, 0);
1702 // if future frames don't use the updated probabilities,
1703 // reset them to the values we saved
1704 if (!s->update_probabilities)
1705 s->prob[0] = s->prob[1];
1707 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1709 if (!s->invisible) {
1710 *(AVFrame*)data = *curframe;
1711 *data_size = sizeof(AVFrame);
1717 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1719 VP8Context *s = avctx->priv_data;
1722 avctx->pix_fmt = PIX_FMT_YUV420P;
1724 dsputil_init(&s->dsp, avctx);
1725 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
1726 ff_vp8dsp_init(&s->vp8dsp);
1731 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1733 vp8_decode_flush(avctx);
1737 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1739 VP8Context *s = avctx->priv_data;
1746 #define REBASE(pic) \
1747 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1749 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1751 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1753 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1754 s->segmentation = s_src->segmentation;
1755 s->lf_delta = s_src->lf_delta;
1756 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1758 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1759 s->framep[0] = REBASE(s_src->next_framep[0]);
1760 s->framep[1] = REBASE(s_src->next_framep[1]);
1761 s->framep[2] = REBASE(s_src->next_framep[2]);
1762 s->framep[3] = REBASE(s_src->next_framep[3]);
1767 AVCodec ff_vp8_decoder = {
1769 .type = AVMEDIA_TYPE_VIDEO,
1771 .priv_data_size = sizeof(VP8Context),
1772 .init = vp8_decode_init,
1773 .close = vp8_decode_free,
1774 .decode = vp8_decode_frame,
1775 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1776 .flush = vp8_decode_flush,
1777 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1778 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1779 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),