2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void vp8_decode_flush(AVCodecContext *avctx)
38 VP8Context *s = avctx->priv_data;
41 if (!avctx->is_copy) {
42 for (i = 0; i < 5; i++)
43 if (s->frames[i].data[0])
44 ff_thread_release_buffer(avctx, &s->frames[i]);
46 memset(s->framep, 0, sizeof(s->framep));
48 av_freep(&s->macroblocks_base);
49 av_freep(&s->filter_strength);
50 av_freep(&s->intra4x4_pred_mode_top);
51 av_freep(&s->top_nnz);
52 av_freep(&s->edge_emu_buffer);
53 av_freep(&s->top_border);
54 av_freep(&s->segmentation_map);
56 s->macroblocks = NULL;
59 static int update_dimensions(VP8Context *s, int width, int height)
61 if (width != s->avctx->width ||
62 height != s->avctx->height) {
63 if (av_image_check_size(width, height, 0, s->avctx))
64 return AVERROR_INVALIDDATA;
66 vp8_decode_flush(s->avctx);
68 avcodec_set_dimensions(s->avctx, width, height);
71 s->mb_width = (s->avctx->coded_width +15) / 16;
72 s->mb_height = (s->avctx->coded_height+15) / 16;
74 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
75 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
76 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
77 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
78 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
79 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
81 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
82 !s->top_nnz || !s->top_border || !s->segmentation_map)
83 return AVERROR(ENOMEM);
85 s->macroblocks = s->macroblocks_base + 1;
90 static void parse_segment_info(VP8Context *s)
92 VP56RangeCoder *c = &s->c;
95 s->segmentation.update_map = vp8_rac_get(c);
97 if (vp8_rac_get(c)) { // update segment feature data
98 s->segmentation.absolute_vals = vp8_rac_get(c);
100 for (i = 0; i < 4; i++)
101 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
103 for (i = 0; i < 4; i++)
104 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
106 if (s->segmentation.update_map)
107 for (i = 0; i < 3; i++)
108 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
111 static void update_lf_deltas(VP8Context *s)
113 VP56RangeCoder *c = &s->c;
116 for (i = 0; i < 4; i++)
117 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
119 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
120 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
125 const uint8_t *sizes = buf;
128 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
130 buf += 3*(s->num_coeff_partitions-1);
131 buf_size -= 3*(s->num_coeff_partitions-1);
135 for (i = 0; i < s->num_coeff_partitions-1; i++) {
136 int size = AV_RL24(sizes + 3*i);
137 if (buf_size - size < 0)
140 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
144 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
149 static void get_quants(VP8Context *s)
151 VP56RangeCoder *c = &s->c;
154 int yac_qi = vp8_rac_get_uint(c, 7);
155 int ydc_delta = vp8_rac_get_sint(c, 4);
156 int y2dc_delta = vp8_rac_get_sint(c, 4);
157 int y2ac_delta = vp8_rac_get_sint(c, 4);
158 int uvdc_delta = vp8_rac_get_sint(c, 4);
159 int uvac_delta = vp8_rac_get_sint(c, 4);
161 for (i = 0; i < 4; i++) {
162 if (s->segmentation.enabled) {
163 base_qi = s->segmentation.base_quant[i];
164 if (!s->segmentation.absolute_vals)
169 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
170 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
171 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
172 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
173 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
174 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
176 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
177 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
182 * Determine which buffers golden and altref should be updated with after this frame.
183 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
185 * Intra frames update all 3 references
186 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
187 * If the update (golden|altref) flag is set, it's updated with the current frame
188 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
189 * If the flag is not set, the number read means:
191 * 1: VP56_FRAME_PREVIOUS
192 * 2: update golden with altref, or update altref with golden
194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
196 VP56RangeCoder *c = &s->c;
199 return VP56_FRAME_CURRENT;
201 switch (vp8_rac_get_uint(c, 2)) {
203 return VP56_FRAME_PREVIOUS;
205 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
207 return VP56_FRAME_NONE;
210 static void update_refs(VP8Context *s)
212 VP56RangeCoder *c = &s->c;
214 int update_golden = vp8_rac_get(c);
215 int update_altref = vp8_rac_get(c);
217 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
218 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
223 VP56RangeCoder *c = &s->c;
224 int header_size, hscale, vscale, i, j, k, l, m, ret;
225 int width = s->avctx->width;
226 int height = s->avctx->height;
228 s->keyframe = !(buf[0] & 1);
229 s->profile = (buf[0]>>1) & 7;
230 s->invisible = !(buf[0] & 0x10);
231 header_size = AV_RL24(buf) >> 5;
236 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
239 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
240 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
241 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
243 if (header_size > buf_size - 7*s->keyframe) {
244 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
245 return AVERROR_INVALIDDATA;
249 if (AV_RL24(buf) != 0x2a019d) {
250 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
251 return AVERROR_INVALIDDATA;
253 width = AV_RL16(buf+3) & 0x3fff;
254 height = AV_RL16(buf+5) & 0x3fff;
255 hscale = buf[4] >> 6;
256 vscale = buf[6] >> 6;
260 if (hscale || vscale)
261 av_log_missing_feature(s->avctx, "Upscaling", 1);
263 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
264 for (i = 0; i < 4; i++)
265 for (j = 0; j < 16; j++)
266 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
267 sizeof(s->prob->token[i][j]));
268 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
269 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
270 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
271 memset(&s->segmentation, 0, sizeof(s->segmentation));
274 if (!s->macroblocks_base || /* first frame */
275 width != s->avctx->width || height != s->avctx->height) {
276 if ((ret = update_dimensions(s, width, height) < 0))
280 ff_vp56_init_range_decoder(c, buf, header_size);
282 buf_size -= header_size;
286 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
287 vp8_rac_get(c); // whether we can skip clamping in dsp functions
290 if ((s->segmentation.enabled = vp8_rac_get(c)))
291 parse_segment_info(s);
293 s->segmentation.update_map = 0; // FIXME: move this to some init function?
295 s->filter.simple = vp8_rac_get(c);
296 s->filter.level = vp8_rac_get_uint(c, 6);
297 s->filter.sharpness = vp8_rac_get_uint(c, 3);
299 if ((s->lf_delta.enabled = vp8_rac_get(c)))
303 if (setup_partitions(s, buf, buf_size)) {
304 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
305 return AVERROR_INVALIDDATA;
312 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
313 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
316 // if we aren't saving this frame's probabilities for future frames,
317 // make a copy of the current probabilities
318 if (!(s->update_probabilities = vp8_rac_get(c)))
319 s->prob[1] = s->prob[0];
321 s->update_last = s->keyframe || vp8_rac_get(c);
323 for (i = 0; i < 4; i++)
324 for (j = 0; j < 8; j++)
325 for (k = 0; k < 3; k++)
326 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
327 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
328 int prob = vp8_rac_get_uint(c, 8);
329 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
330 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
333 if ((s->mbskip_enabled = vp8_rac_get(c)))
334 s->prob->mbskip = vp8_rac_get_uint(c, 8);
337 s->prob->intra = vp8_rac_get_uint(c, 8);
338 s->prob->last = vp8_rac_get_uint(c, 8);
339 s->prob->golden = vp8_rac_get_uint(c, 8);
342 for (i = 0; i < 4; i++)
343 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
345 for (i = 0; i < 3; i++)
346 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
348 // 17.2 MV probability update
349 for (i = 0; i < 2; i++)
350 for (j = 0; j < 19; j++)
351 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
352 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
360 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
361 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
365 * Motion vector coding, 17.1.
367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
371 if (vp56_rac_get_prob_branchy(c, p[0])) {
374 for (i = 0; i < 3; i++)
375 x += vp56_rac_get_prob(c, p[9 + i]) << i;
376 for (i = 9; i > 3; i--)
377 x += vp56_rac_get_prob(c, p[9 + i]) << i;
378 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
382 const uint8_t *ps = p+2;
383 bit = vp56_rac_get_prob(c, *ps);
386 bit = vp56_rac_get_prob(c, *ps);
389 x += vp56_rac_get_prob(c, *ps);
392 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
395 static av_always_inline
396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
399 return vp8_submv_prob[4-!!left];
401 return vp8_submv_prob[2];
402 return vp8_submv_prob[1-!!left];
406 * Split motion vector prediction, 16.4.
407 * @returns the number of motion vectors parsed (2, 4 or 16)
409 static av_always_inline
410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
414 VP8Macroblock *top_mb = &mb[2];
415 VP8Macroblock *left_mb = &mb[-1];
416 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
417 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
418 *mbsplits_cur, *firstidx;
419 VP56mv *top_mv = top_mb->bmv;
420 VP56mv *left_mv = left_mb->bmv;
421 VP56mv *cur_mv = mb->bmv;
423 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
424 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
425 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
427 part_idx = VP8_SPLITMVMODE_8x8;
430 part_idx = VP8_SPLITMVMODE_4x4;
433 num = vp8_mbsplit_count[part_idx];
434 mbsplits_cur = vp8_mbsplits[part_idx],
435 firstidx = vp8_mbfirstidx[part_idx];
436 mb->partitioning = part_idx;
438 for (n = 0; n < num; n++) {
440 uint32_t left, above;
441 const uint8_t *submv_prob;
444 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
446 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
448 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
450 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
452 submv_prob = get_submv_prob(left, above);
454 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
455 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
456 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
457 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
458 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
460 AV_ZERO32(&mb->bmv[n]);
463 AV_WN32A(&mb->bmv[n], above);
466 AV_WN32A(&mb->bmv[n], left);
473 static av_always_inline
474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
476 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
478 mb + 1 /* top-left */ };
479 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
480 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
482 int cur_sign_bias = s->sign_bias[mb->ref_frame];
483 int8_t *sign_bias = s->sign_bias;
485 uint8_t cnt[4] = { 0 };
486 VP56RangeCoder *c = &s->c;
488 AV_ZERO32(&near_mv[0]);
489 AV_ZERO32(&near_mv[1]);
491 /* Process MB on top, left and top-left */
492 #define MV_EDGE_CHECK(n)\
494 VP8Macroblock *edge = mb_edge[n];\
495 int edge_ref = edge->ref_frame;\
496 if (edge_ref != VP56_FRAME_CURRENT) {\
497 uint32_t mv = AV_RN32A(&edge->mv);\
499 if (cur_sign_bias != sign_bias[edge_ref]) {\
500 /* SWAR negate of the values in mv. */\
502 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
504 if (!n || mv != AV_RN32A(&near_mv[idx]))\
505 AV_WN32A(&near_mv[++idx], mv);\
506 cnt[idx] += 1 + (n != 2);\
508 cnt[CNT_ZERO] += 1 + (n != 2);\
516 mb->partitioning = VP8_SPLITMVMODE_NONE;
517 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
518 mb->mode = VP8_MVMODE_MV;
520 /* If we have three distinct MVs, merge first and last if they're the same */
521 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
522 cnt[CNT_NEAREST] += 1;
524 /* Swap near and nearest if necessary */
525 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
526 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
527 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
530 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
531 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
533 /* Choose the best mv out of 0,0 and the nearest mv */
534 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
535 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
536 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
537 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
539 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
540 mb->mode = VP8_MVMODE_SPLIT;
541 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
543 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
544 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
548 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
552 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
556 mb->mode = VP8_MVMODE_ZERO;
562 static av_always_inline
563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
564 int mb_x, int keyframe)
566 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
569 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
570 uint8_t* const left = s->intra4x4_pred_mode_left;
571 for (y = 0; y < 4; y++) {
572 for (x = 0; x < 4; x++) {
574 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
575 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
576 left[y] = top[x] = *intra4x4;
582 for (i = 0; i < 16; i++)
583 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
587 static av_always_inline
588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
590 VP56RangeCoder *c = &s->c;
592 if (s->segmentation.update_map)
593 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
595 *segment = ref ? *ref : *segment;
596 s->segment = *segment;
598 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
601 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
603 if (mb->mode == MODE_I4x4) {
604 decode_intra4x4_modes(s, c, mb_x, 1);
606 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
607 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
608 AV_WN32A(s->intra4x4_pred_mode_left, modes);
611 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
612 mb->ref_frame = VP56_FRAME_CURRENT;
613 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
615 if (vp56_rac_get_prob_branchy(c, s->prob->last))
616 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
617 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
619 mb->ref_frame = VP56_FRAME_PREVIOUS;
620 s->ref_count[mb->ref_frame-1]++;
622 // motion vectors, 16.3
623 decode_mvs(s, mb, mb_x, mb_y);
626 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
628 if (mb->mode == MODE_I4x4)
629 decode_intra4x4_modes(s, c, mb_x, 0);
631 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
632 mb->ref_frame = VP56_FRAME_CURRENT;
633 mb->partitioning = VP8_SPLITMVMODE_NONE;
634 AV_ZERO32(&mb->bmv[0]);
638 #ifndef decode_block_coeffs_internal
640 * @param c arithmetic bitstream reader context
641 * @param block destination for block coefficients
642 * @param probs probabilities to use when reading trees from the bitstream
643 * @param i initial coeff index, 0 unless a separate DC block is coded
644 * @param qmul array holding the dc/ac dequant factor at position 0/1
645 * @return 0 if no coeffs were decoded
646 * otherwise, the index of the last coeff decoded plus one
648 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
649 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
650 int i, uint8_t *token_prob, int16_t qmul[2])
655 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
659 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
661 return i; // invalid input; blocks should end with EOB
662 token_prob = probs[i][0];
666 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
668 token_prob = probs[i+1][1];
670 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
671 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
673 coeff += vp56_rac_get_prob(c, token_prob[5]);
677 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
678 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
679 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
682 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
683 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
685 } else { // DCT_CAT3 and up
686 int a = vp56_rac_get_prob(c, token_prob[8]);
687 int b = vp56_rac_get_prob(c, token_prob[9+a]);
688 int cat = (a<<1) + b;
689 coeff = 3 + (8<<cat);
690 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
693 token_prob = probs[i+1][2];
695 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
703 * @param c arithmetic bitstream reader context
704 * @param block destination for block coefficients
705 * @param probs probabilities to use when reading trees from the bitstream
706 * @param i initial coeff index, 0 unless a separate DC block is coded
707 * @param zero_nhood the initial prediction context for number of surrounding
708 * all-zero blocks (only left/top, so 0-2)
709 * @param qmul array holding the dc/ac dequant factor at position 0/1
710 * @return 0 if no coeffs were decoded
711 * otherwise, the index of the last coeff decoded plus one
713 static av_always_inline
714 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
715 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
716 int i, int zero_nhood, int16_t qmul[2])
718 uint8_t *token_prob = probs[i][zero_nhood];
719 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
721 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
724 static av_always_inline
725 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
726 uint8_t t_nnz[9], uint8_t l_nnz[9])
728 int i, x, y, luma_start = 0, luma_ctx = 3;
729 int nnz_pred, nnz, nnz_total = 0;
730 int segment = s->segment;
733 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
734 nnz_pred = t_nnz[8] + l_nnz[8];
736 // decode DC values and do hadamard
737 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
738 s->qmat[segment].luma_dc_qmul);
739 l_nnz[8] = t_nnz[8] = !!nnz;
744 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
746 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
753 for (y = 0; y < 4; y++)
754 for (x = 0; x < 4; x++) {
755 nnz_pred = l_nnz[y] + t_nnz[x];
756 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
757 nnz_pred, s->qmat[segment].luma_qmul);
758 // nnz+block_dc may be one more than the actual last index, but we don't care
759 s->non_zero_count_cache[y][x] = nnz + block_dc;
760 t_nnz[x] = l_nnz[y] = !!nnz;
765 // TODO: what to do about dimensions? 2nd dim for luma is x,
766 // but for chroma it's (y<<1)|x
767 for (i = 4; i < 6; i++)
768 for (y = 0; y < 2; y++)
769 for (x = 0; x < 2; x++) {
770 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
771 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
772 nnz_pred, s->qmat[segment].chroma_qmul);
773 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
774 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
778 // if there were no coded coeffs despite the macroblock not being marked skip,
779 // we MUST not do the inner loop filter and should not do IDCT
780 // Since skip isn't used for bitstream prediction, just manually set it.
785 static av_always_inline
786 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
787 int linesize, int uvlinesize, int simple)
789 AV_COPY128(top_border, src_y + 15*linesize);
791 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
792 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
796 static av_always_inline
797 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
798 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
799 int simple, int xchg)
801 uint8_t *top_border_m1 = top_border-32; // for TL prediction
803 src_cb -= uvlinesize;
804 src_cr -= uvlinesize;
806 #define XCHG(a,b,xchg) do { \
807 if (xchg) AV_SWAP64(b,a); \
808 else AV_COPY64(b,a); \
811 XCHG(top_border_m1+8, src_y-8, xchg);
812 XCHG(top_border, src_y, xchg);
813 XCHG(top_border+8, src_y+8, 1);
814 if (mb_x < mb_width-1)
815 XCHG(top_border+32, src_y+16, 1);
817 // only copy chroma for normal loop filter
818 // or to initialize the top row to 127
819 if (!simple || !mb_y) {
820 XCHG(top_border_m1+16, src_cb-8, xchg);
821 XCHG(top_border_m1+24, src_cr-8, xchg);
822 XCHG(top_border+16, src_cb, 1);
823 XCHG(top_border+24, src_cr, 1);
827 static av_always_inline
828 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
831 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
833 return mb_y ? mode : LEFT_DC_PRED8x8;
837 static av_always_inline
838 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
841 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
843 return mb_y ? mode : HOR_PRED8x8;
847 static av_always_inline
848 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
850 if (mode == DC_PRED8x8) {
851 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
857 static av_always_inline
858 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
862 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
864 return !mb_y ? DC_127_PRED8x8 : mode;
866 return !mb_x ? DC_129_PRED8x8 : mode;
867 case PLANE_PRED8x8 /*TM*/:
868 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
873 static av_always_inline
874 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
877 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
879 return mb_y ? mode : HOR_VP8_PRED;
883 static av_always_inline
884 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
893 case DIAG_DOWN_LEFT_PRED:
895 return !mb_y ? DC_127_PRED : mode;
903 return !mb_x ? DC_129_PRED : mode;
905 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
906 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
907 case DIAG_DOWN_RIGHT_PRED:
908 case VERT_RIGHT_PRED:
917 static av_always_inline
918 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
921 AVCodecContext *avctx = s->avctx;
922 int x, y, mode, nnz, tr;
924 // for the first row, we need to run xchg_mb_border to init the top edge to 127
925 // otherwise, skip it if we aren't going to deblock
926 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
927 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
928 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
929 s->filter.simple, 1);
931 if (mb->mode < MODE_I4x4) {
932 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
933 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
935 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
937 s->hpc.pred16x16[mode](dst[0], s->linesize);
939 uint8_t *ptr = dst[0];
940 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
941 uint8_t tr_top[4] = { 127, 127, 127, 127 };
943 // all blocks on the right edge of the macroblock use bottom edge
944 // the top macroblock for their topright edge
945 uint8_t *tr_right = ptr - s->linesize + 16;
947 // if we're on the right edge of the frame, said edge is extended
948 // from the top macroblock
949 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
950 mb_x == s->mb_width-1) {
951 tr = tr_right[-1]*0x01010101;
952 tr_right = (uint8_t *)&tr;
956 AV_ZERO128(s->non_zero_count_cache);
958 for (y = 0; y < 4; y++) {
959 uint8_t *topright = ptr + 4 - s->linesize;
960 for (x = 0; x < 4; x++) {
961 int copy = 0, linesize = s->linesize;
962 uint8_t *dst = ptr+4*x;
963 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
965 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
970 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
971 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
977 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
979 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
983 copy_dst[3] = ptr[4*x-s->linesize-1];
992 copy_dst[11] = ptr[4*x -1];
993 copy_dst[19] = ptr[4*x+s->linesize -1];
994 copy_dst[27] = ptr[4*x+s->linesize*2-1];
995 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1001 s->hpc.pred4x4[mode](dst, topright, linesize);
1003 AV_COPY32(ptr+4*x , copy_dst+12);
1004 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1005 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1006 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1009 nnz = s->non_zero_count_cache[y][x];
1012 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1014 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1019 ptr += 4*s->linesize;
1024 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1025 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1027 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1029 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1030 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1032 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1033 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1034 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1035 s->filter.simple, 0);
1038 static const uint8_t subpel_idx[3][8] = {
1039 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1040 // also function pointer index
1041 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1042 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1048 * @param s VP8 decoding context
1049 * @param dst target buffer for block data at block position
1050 * @param ref reference picture buffer at origin (0, 0)
1051 * @param mv motion vector (relative to block position) to get pixel data from
1052 * @param x_off horizontal position of block from origin (0, 0)
1053 * @param y_off vertical position of block from origin (0, 0)
1054 * @param block_w width of block (16, 8 or 4)
1055 * @param block_h height of block (always same as block_w)
1056 * @param width width of src/dst plane data
1057 * @param height height of src/dst plane data
1058 * @param linesize size of a single line of plane data, including padding
1059 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1061 static av_always_inline
1062 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1063 int x_off, int y_off, int block_w, int block_h,
1064 int width, int height, int linesize,
1065 vp8_mc_func mc_func[3][3])
1067 uint8_t *src = ref->data[0];
1071 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1072 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1074 x_off += mv->x >> 2;
1075 y_off += mv->y >> 2;
1078 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1079 src += y_off * linesize + x_off;
1080 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1081 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1082 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1083 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1084 x_off - mx_idx, y_off - my_idx, width, height);
1085 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1087 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1089 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1090 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1095 * chroma MC function
1097 * @param s VP8 decoding context
1098 * @param dst1 target buffer for block data at block position (U plane)
1099 * @param dst2 target buffer for block data at block position (V plane)
1100 * @param ref reference picture buffer at origin (0, 0)
1101 * @param mv motion vector (relative to block position) to get pixel data from
1102 * @param x_off horizontal position of block from origin (0, 0)
1103 * @param y_off vertical position of block from origin (0, 0)
1104 * @param block_w width of block (16, 8 or 4)
1105 * @param block_h height of block (always same as block_w)
1106 * @param width width of src/dst plane data
1107 * @param height height of src/dst plane data
1108 * @param linesize size of a single line of plane data, including padding
1109 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1111 static av_always_inline
1112 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1113 const VP56mv *mv, int x_off, int y_off,
1114 int block_w, int block_h, int width, int height, int linesize,
1115 vp8_mc_func mc_func[3][3])
1117 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1120 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1121 int my = mv->y&7, my_idx = subpel_idx[0][my];
1123 x_off += mv->x >> 3;
1124 y_off += mv->y >> 3;
1127 src1 += y_off * linesize + x_off;
1128 src2 += y_off * linesize + x_off;
1129 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1130 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1131 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1132 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1133 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1134 x_off - mx_idx, y_off - my_idx, width, height);
1135 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1136 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1138 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1139 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1140 x_off - mx_idx, y_off - my_idx, width, height);
1141 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1142 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1144 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1145 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1148 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1149 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1150 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1154 static av_always_inline
1155 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1156 AVFrame *ref_frame, int x_off, int y_off,
1157 int bx_off, int by_off,
1158 int block_w, int block_h,
1159 int width, int height, VP56mv *mv)
1164 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1165 ref_frame, mv, x_off + bx_off, y_off + by_off,
1166 block_w, block_h, width, height, s->linesize,
1167 s->put_pixels_tab[block_w == 8]);
1170 if (s->profile == 3) {
1174 x_off >>= 1; y_off >>= 1;
1175 bx_off >>= 1; by_off >>= 1;
1176 width >>= 1; height >>= 1;
1177 block_w >>= 1; block_h >>= 1;
1178 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1179 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1180 &uvmv, x_off + bx_off, y_off + by_off,
1181 block_w, block_h, width, height, s->uvlinesize,
1182 s->put_pixels_tab[1 + (block_w == 4)]);
1185 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1186 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1187 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1189 /* Don't prefetch refs that haven't been used very often this frame. */
1190 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1191 int x_off = mb_x << 4, y_off = mb_y << 4;
1192 int mx = (mb->mv.x>>2) + x_off + 8;
1193 int my = (mb->mv.y>>2) + y_off;
1194 uint8_t **src= s->framep[ref]->data;
1195 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1196 /* For threading, a ff_thread_await_progress here might be useful, but
1197 * it actually slows down the decoder. Since a bad prefetch doesn't
1198 * generate bad decoder output, we don't run it here. */
1199 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1200 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1201 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1206 * Apply motion vectors to prediction buffer, chapter 18.
1208 static av_always_inline
1209 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1212 int x_off = mb_x << 4, y_off = mb_y << 4;
1213 int width = 16*s->mb_width, height = 16*s->mb_height;
1214 AVFrame *ref = s->framep[mb->ref_frame];
1215 VP56mv *bmv = mb->bmv;
1217 switch (mb->partitioning) {
1218 case VP8_SPLITMVMODE_NONE:
1219 vp8_mc_part(s, dst, ref, x_off, y_off,
1220 0, 0, 16, 16, width, height, &mb->mv);
1222 case VP8_SPLITMVMODE_4x4: {
1227 for (y = 0; y < 4; y++) {
1228 for (x = 0; x < 4; x++) {
1229 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1231 4*x + x_off, 4*y + y_off, 4, 4,
1232 width, height, s->linesize,
1233 s->put_pixels_tab[2]);
1238 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1239 for (y = 0; y < 2; y++) {
1240 for (x = 0; x < 2; x++) {
1241 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1242 mb->bmv[ 2*y * 4 + 2*x+1].x +
1243 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1244 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1245 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1246 mb->bmv[ 2*y * 4 + 2*x+1].y +
1247 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1248 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1249 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1250 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1251 if (s->profile == 3) {
1255 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1256 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1257 4*x + x_off, 4*y + y_off, 4, 4,
1258 width, height, s->uvlinesize,
1259 s->put_pixels_tab[2]);
1264 case VP8_SPLITMVMODE_16x8:
1265 vp8_mc_part(s, dst, ref, x_off, y_off,
1266 0, 0, 16, 8, width, height, &bmv[0]);
1267 vp8_mc_part(s, dst, ref, x_off, y_off,
1268 0, 8, 16, 8, width, height, &bmv[1]);
1270 case VP8_SPLITMVMODE_8x16:
1271 vp8_mc_part(s, dst, ref, x_off, y_off,
1272 0, 0, 8, 16, width, height, &bmv[0]);
1273 vp8_mc_part(s, dst, ref, x_off, y_off,
1274 8, 0, 8, 16, width, height, &bmv[1]);
1276 case VP8_SPLITMVMODE_8x8:
1277 vp8_mc_part(s, dst, ref, x_off, y_off,
1278 0, 0, 8, 8, width, height, &bmv[0]);
1279 vp8_mc_part(s, dst, ref, x_off, y_off,
1280 8, 0, 8, 8, width, height, &bmv[1]);
1281 vp8_mc_part(s, dst, ref, x_off, y_off,
1282 0, 8, 8, 8, width, height, &bmv[2]);
1283 vp8_mc_part(s, dst, ref, x_off, y_off,
1284 8, 8, 8, 8, width, height, &bmv[3]);
1289 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1293 if (mb->mode != MODE_I4x4) {
1294 uint8_t *y_dst = dst[0];
1295 for (y = 0; y < 4; y++) {
1296 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1298 if (nnz4&~0x01010101) {
1299 for (x = 0; x < 4; x++) {
1300 if ((uint8_t)nnz4 == 1)
1301 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1302 else if((uint8_t)nnz4 > 1)
1303 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1309 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1312 y_dst += 4*s->linesize;
1316 for (ch = 0; ch < 2; ch++) {
1317 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1319 uint8_t *ch_dst = dst[1+ch];
1320 if (nnz4&~0x01010101) {
1321 for (y = 0; y < 2; y++) {
1322 for (x = 0; x < 2; x++) {
1323 if ((uint8_t)nnz4 == 1)
1324 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1325 else if((uint8_t)nnz4 > 1)
1326 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1329 goto chroma_idct_end;
1331 ch_dst += 4*s->uvlinesize;
1334 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1341 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1343 int interior_limit, filter_level;
1345 if (s->segmentation.enabled) {
1346 filter_level = s->segmentation.filter_level[s->segment];
1347 if (!s->segmentation.absolute_vals)
1348 filter_level += s->filter.level;
1350 filter_level = s->filter.level;
1352 if (s->lf_delta.enabled) {
1353 filter_level += s->lf_delta.ref[mb->ref_frame];
1354 filter_level += s->lf_delta.mode[mb->mode];
1357 filter_level = av_clip_uintp2(filter_level, 6);
1359 interior_limit = filter_level;
1360 if (s->filter.sharpness) {
1361 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1362 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1364 interior_limit = FFMAX(interior_limit, 1);
1366 f->filter_level = filter_level;
1367 f->inner_limit = interior_limit;
1368 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1371 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1373 int mbedge_lim, bedge_lim, hev_thresh;
1374 int filter_level = f->filter_level;
1375 int inner_limit = f->inner_limit;
1376 int inner_filter = f->inner_filter;
1377 int linesize = s->linesize;
1378 int uvlinesize = s->uvlinesize;
1379 static const uint8_t hev_thresh_lut[2][64] = {
1380 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1381 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1382 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1384 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1385 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1386 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1393 bedge_lim = 2*filter_level + inner_limit;
1394 mbedge_lim = bedge_lim + 4;
1396 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1399 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1400 mbedge_lim, inner_limit, hev_thresh);
1401 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1402 mbedge_lim, inner_limit, hev_thresh);
1406 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1407 inner_limit, hev_thresh);
1408 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1409 inner_limit, hev_thresh);
1410 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1411 inner_limit, hev_thresh);
1412 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1413 uvlinesize, bedge_lim,
1414 inner_limit, hev_thresh);
1418 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1419 mbedge_lim, inner_limit, hev_thresh);
1420 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1421 mbedge_lim, inner_limit, hev_thresh);
1425 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1426 linesize, bedge_lim,
1427 inner_limit, hev_thresh);
1428 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1429 linesize, bedge_lim,
1430 inner_limit, hev_thresh);
1431 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1432 linesize, bedge_lim,
1433 inner_limit, hev_thresh);
1434 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1435 dst[2] + 4 * uvlinesize,
1436 uvlinesize, bedge_lim,
1437 inner_limit, hev_thresh);
1441 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1443 int mbedge_lim, bedge_lim;
1444 int filter_level = f->filter_level;
1445 int inner_limit = f->inner_limit;
1446 int inner_filter = f->inner_filter;
1447 int linesize = s->linesize;
1452 bedge_lim = 2*filter_level + inner_limit;
1453 mbedge_lim = bedge_lim + 4;
1456 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1458 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1459 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1460 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1464 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1466 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1467 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1468 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1472 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1474 VP8FilterStrength *f = s->filter_strength;
1476 curframe->data[0] + 16*mb_y*s->linesize,
1477 curframe->data[1] + 8*mb_y*s->uvlinesize,
1478 curframe->data[2] + 8*mb_y*s->uvlinesize
1482 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1483 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1484 filter_mb(s, dst, f++, mb_x, mb_y);
1491 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1493 VP8FilterStrength *f = s->filter_strength;
1494 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1497 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1498 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1499 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1504 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1507 VP8Context *s = avctx->priv_data;
1508 int ret, mb_x, mb_y, i, y, referenced;
1509 enum AVDiscard skip_thresh;
1510 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1512 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1515 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1516 || s->update_altref == VP56_FRAME_CURRENT;
1518 skip_thresh = !referenced ? AVDISCARD_NONREF :
1519 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1521 if (avctx->skip_frame >= skip_thresh) {
1525 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1527 // release no longer referenced frames
1528 for (i = 0; i < 5; i++)
1529 if (s->frames[i].data[0] &&
1530 &s->frames[i] != prev_frame &&
1531 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1532 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1533 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1534 ff_thread_release_buffer(avctx, &s->frames[i]);
1536 // find a free buffer
1537 for (i = 0; i < 5; i++)
1538 if (&s->frames[i] != prev_frame &&
1539 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1540 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1541 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1542 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1546 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1549 if (curframe->data[0])
1550 ff_thread_release_buffer(avctx, curframe);
1552 curframe->key_frame = s->keyframe;
1553 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1554 curframe->reference = referenced ? 3 : 0;
1555 curframe->ref_index[0] = s->segmentation_map;
1556 if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1557 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1561 // check if golden and altref are swapped
1562 if (s->update_altref != VP56_FRAME_NONE) {
1563 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1565 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1567 if (s->update_golden != VP56_FRAME_NONE) {
1568 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1570 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1572 if (s->update_last) {
1573 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1575 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1577 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1579 ff_thread_finish_setup(avctx);
1581 // Given that arithmetic probabilities are updated every frame, it's quite likely
1582 // that the values we have on a random interframe are complete junk if we didn't
1583 // start decode on a keyframe. So just don't display anything rather than junk.
1584 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1585 !s->framep[VP56_FRAME_GOLDEN] ||
1586 !s->framep[VP56_FRAME_GOLDEN2])) {
1587 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1588 return AVERROR_INVALIDDATA;
1591 s->linesize = curframe->linesize[0];
1592 s->uvlinesize = curframe->linesize[1];
1594 if (!s->edge_emu_buffer)
1595 s->edge_emu_buffer = av_malloc(21*s->linesize);
1597 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1599 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1600 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1602 // top edge of 127 for intra prediction
1603 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1604 s->top_border[0][15] = s->top_border[0][23] = 127;
1605 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1607 memset(s->ref_count, 0, sizeof(s->ref_count));
1609 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1611 #define MARGIN (16 << 2)
1612 s->mv_min.y = -MARGIN;
1613 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1615 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1616 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1617 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1618 int mb_xy = mb_y*s->mb_width;
1620 curframe->data[0] + 16*mb_y*s->linesize,
1621 curframe->data[1] + 8*mb_y*s->uvlinesize,
1622 curframe->data[2] + 8*mb_y*s->uvlinesize
1625 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1626 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1627 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1629 // left edge of 129 for intra prediction
1630 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1631 for (i = 0; i < 3; i++)
1632 for (y = 0; y < 16>>!!i; y++)
1633 dst[i][y*curframe->linesize[i]-1] = 129;
1634 if (mb_y == 1) // top left edge is also 129
1635 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1638 s->mv_min.x = -MARGIN;
1639 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1640 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1641 ff_thread_await_progress(prev_frame, mb_y, 0);
1643 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1644 /* Prefetch the current frame, 4 MBs ahead */
1645 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1646 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1648 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1649 prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1651 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1654 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1656 if (mb->mode <= MODE_I4x4)
1657 intra_predict(s, dst, mb, mb_x, mb_y);
1659 inter_predict(s, dst, mb, mb_x, mb_y);
1661 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1664 idct_mb(s, dst, mb);
1666 AV_ZERO64(s->left_nnz);
1667 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1669 // Reset DC block predictors if they would exist if the mb had coefficients
1670 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1672 s->top_nnz[mb_x][8] = 0;
1676 if (s->deblock_filter)
1677 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1679 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1687 if (s->deblock_filter) {
1688 if (s->filter.simple)
1689 filter_mb_row_simple(s, curframe, mb_y);
1691 filter_mb_row(s, curframe, mb_y);
1696 ff_thread_report_progress(curframe, mb_y, 0);
1699 ff_thread_report_progress(curframe, INT_MAX, 0);
1701 // if future frames don't use the updated probabilities,
1702 // reset them to the values we saved
1703 if (!s->update_probabilities)
1704 s->prob[0] = s->prob[1];
1706 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1708 if (!s->invisible) {
1709 *(AVFrame*)data = *curframe;
1710 *data_size = sizeof(AVFrame);
1716 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1718 VP8Context *s = avctx->priv_data;
1721 avctx->pix_fmt = PIX_FMT_YUV420P;
1723 dsputil_init(&s->dsp, avctx);
1724 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1725 ff_vp8dsp_init(&s->vp8dsp);
1730 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1732 vp8_decode_flush(avctx);
1736 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1738 VP8Context *s = avctx->priv_data;
1745 #define REBASE(pic) \
1746 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1748 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1750 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1752 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1753 s->segmentation = s_src->segmentation;
1754 s->lf_delta = s_src->lf_delta;
1755 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1757 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1758 s->framep[0] = REBASE(s_src->next_framep[0]);
1759 s->framep[1] = REBASE(s_src->next_framep[1]);
1760 s->framep[2] = REBASE(s_src->next_framep[2]);
1761 s->framep[3] = REBASE(s_src->next_framep[3]);
1766 AVCodec ff_vp8_decoder = {
1775 CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1776 .flush = vp8_decode_flush,
1777 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1778 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1779 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),