2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void vp8_decode_flush(AVCodecContext *avctx)
38 VP8Context *s = avctx->priv_data;
41 if (!avctx->is_copy) {
42 for (i = 0; i < 5; i++)
43 if (s->frames[i].data[0])
44 ff_thread_release_buffer(avctx, &s->frames[i]);
46 memset(s->framep, 0, sizeof(s->framep));
48 av_freep(&s->macroblocks_base);
49 av_freep(&s->filter_strength);
50 av_freep(&s->intra4x4_pred_mode_top);
51 av_freep(&s->top_nnz);
52 av_freep(&s->edge_emu_buffer);
53 av_freep(&s->top_border);
54 av_freep(&s->segmentation_map);
56 s->macroblocks = NULL;
59 static int update_dimensions(VP8Context *s, int width, int height)
61 if (width != s->avctx->width ||
62 height != s->avctx->height) {
63 if (av_image_check_size(width, height, 0, s->avctx))
64 return AVERROR_INVALIDDATA;
66 vp8_decode_flush(s->avctx);
68 avcodec_set_dimensions(s->avctx, width, height);
71 s->mb_width = (s->avctx->coded_width +15) / 16;
72 s->mb_height = (s->avctx->coded_height+15) / 16;
74 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
75 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
76 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
77 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
78 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
79 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
81 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
82 !s->top_nnz || !s->top_border || !s->segmentation_map)
83 return AVERROR(ENOMEM);
85 s->macroblocks = s->macroblocks_base + 1;
90 static void parse_segment_info(VP8Context *s)
92 VP56RangeCoder *c = &s->c;
95 s->segmentation.update_map = vp8_rac_get(c);
97 if (vp8_rac_get(c)) { // update segment feature data
98 s->segmentation.absolute_vals = vp8_rac_get(c);
100 for (i = 0; i < 4; i++)
101 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
103 for (i = 0; i < 4; i++)
104 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
106 if (s->segmentation.update_map)
107 for (i = 0; i < 3; i++)
108 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
111 static void update_lf_deltas(VP8Context *s)
113 VP56RangeCoder *c = &s->c;
116 for (i = 0; i < 4; i++)
117 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
119 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
120 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
125 const uint8_t *sizes = buf;
128 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
130 buf += 3*(s->num_coeff_partitions-1);
131 buf_size -= 3*(s->num_coeff_partitions-1);
135 for (i = 0; i < s->num_coeff_partitions-1; i++) {
136 int size = AV_RL24(sizes + 3*i);
137 if (buf_size - size < 0)
140 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
144 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
149 static void get_quants(VP8Context *s)
151 VP56RangeCoder *c = &s->c;
154 int yac_qi = vp8_rac_get_uint(c, 7);
155 int ydc_delta = vp8_rac_get_sint(c, 4);
156 int y2dc_delta = vp8_rac_get_sint(c, 4);
157 int y2ac_delta = vp8_rac_get_sint(c, 4);
158 int uvdc_delta = vp8_rac_get_sint(c, 4);
159 int uvac_delta = vp8_rac_get_sint(c, 4);
161 for (i = 0; i < 4; i++) {
162 if (s->segmentation.enabled) {
163 base_qi = s->segmentation.base_quant[i];
164 if (!s->segmentation.absolute_vals)
169 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
170 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
171 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
172 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
173 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
174 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
176 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
177 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
182 * Determine which buffers golden and altref should be updated with after this frame.
183 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
185 * Intra frames update all 3 references
186 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
187 * If the update (golden|altref) flag is set, it's updated with the current frame
188 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
189 * If the flag is not set, the number read means:
191 * 1: VP56_FRAME_PREVIOUS
192 * 2: update golden with altref, or update altref with golden
194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
196 VP56RangeCoder *c = &s->c;
199 return VP56_FRAME_CURRENT;
201 switch (vp8_rac_get_uint(c, 2)) {
203 return VP56_FRAME_PREVIOUS;
205 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
207 return VP56_FRAME_NONE;
210 static void update_refs(VP8Context *s)
212 VP56RangeCoder *c = &s->c;
214 int update_golden = vp8_rac_get(c);
215 int update_altref = vp8_rac_get(c);
217 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
218 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
223 VP56RangeCoder *c = &s->c;
224 int header_size, hscale, vscale, i, j, k, l, m, ret;
225 int width = s->avctx->width;
226 int height = s->avctx->height;
228 s->keyframe = !(buf[0] & 1);
229 s->profile = (buf[0]>>1) & 7;
230 s->invisible = !(buf[0] & 0x10);
231 header_size = AV_RL24(buf) >> 5;
236 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
239 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
240 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
241 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
243 if (header_size > buf_size - 7*s->keyframe) {
244 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
245 return AVERROR_INVALIDDATA;
249 if (AV_RL24(buf) != 0x2a019d) {
250 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
251 return AVERROR_INVALIDDATA;
253 width = AV_RL16(buf+3) & 0x3fff;
254 height = AV_RL16(buf+5) & 0x3fff;
255 hscale = buf[4] >> 6;
256 vscale = buf[6] >> 6;
260 if (hscale || vscale)
261 av_log_missing_feature(s->avctx, "Upscaling", 1);
263 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
264 for (i = 0; i < 4; i++)
265 for (j = 0; j < 16; j++)
266 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
267 sizeof(s->prob->token[i][j]));
268 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
269 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
270 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
271 memset(&s->segmentation, 0, sizeof(s->segmentation));
274 if (!s->macroblocks_base || /* first frame */
275 width != s->avctx->width || height != s->avctx->height) {
276 if ((ret = update_dimensions(s, width, height) < 0))
280 ff_vp56_init_range_decoder(c, buf, header_size);
282 buf_size -= header_size;
286 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
287 vp8_rac_get(c); // whether we can skip clamping in dsp functions
290 if ((s->segmentation.enabled = vp8_rac_get(c)))
291 parse_segment_info(s);
293 s->segmentation.update_map = 0; // FIXME: move this to some init function?
295 s->filter.simple = vp8_rac_get(c);
296 s->filter.level = vp8_rac_get_uint(c, 6);
297 s->filter.sharpness = vp8_rac_get_uint(c, 3);
299 if ((s->lf_delta.enabled = vp8_rac_get(c)))
303 if (setup_partitions(s, buf, buf_size)) {
304 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
305 return AVERROR_INVALIDDATA;
312 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
313 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
316 // if we aren't saving this frame's probabilities for future frames,
317 // make a copy of the current probabilities
318 if (!(s->update_probabilities = vp8_rac_get(c)))
319 s->prob[1] = s->prob[0];
321 s->update_last = s->keyframe || vp8_rac_get(c);
323 for (i = 0; i < 4; i++)
324 for (j = 0; j < 8; j++)
325 for (k = 0; k < 3; k++)
326 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
327 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
328 int prob = vp8_rac_get_uint(c, 8);
329 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
330 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
333 if ((s->mbskip_enabled = vp8_rac_get(c)))
334 s->prob->mbskip = vp8_rac_get_uint(c, 8);
337 s->prob->intra = vp8_rac_get_uint(c, 8);
338 s->prob->last = vp8_rac_get_uint(c, 8);
339 s->prob->golden = vp8_rac_get_uint(c, 8);
342 for (i = 0; i < 4; i++)
343 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
345 for (i = 0; i < 3; i++)
346 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
348 // 17.2 MV probability update
349 for (i = 0; i < 2; i++)
350 for (j = 0; j < 19; j++)
351 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
352 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
360 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
361 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
365 * Motion vector coding, 17.1.
367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
371 if (vp56_rac_get_prob_branchy(c, p[0])) {
374 for (i = 0; i < 3; i++)
375 x += vp56_rac_get_prob(c, p[9 + i]) << i;
376 for (i = 9; i > 3; i--)
377 x += vp56_rac_get_prob(c, p[9 + i]) << i;
378 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
382 const uint8_t *ps = p+2;
383 bit = vp56_rac_get_prob(c, *ps);
386 bit = vp56_rac_get_prob(c, *ps);
389 x += vp56_rac_get_prob(c, *ps);
392 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
395 static av_always_inline
396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
399 return vp8_submv_prob[4-!!left];
401 return vp8_submv_prob[2];
402 return vp8_submv_prob[1-!!left];
406 * Split motion vector prediction, 16.4.
407 * @returns the number of motion vectors parsed (2, 4 or 16)
409 static av_always_inline
410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
414 VP8Macroblock *top_mb = &mb[2];
415 VP8Macroblock *left_mb = &mb[-1];
416 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
417 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
418 *mbsplits_cur, *firstidx;
419 VP56mv *top_mv = top_mb->bmv;
420 VP56mv *left_mv = left_mb->bmv;
421 VP56mv *cur_mv = mb->bmv;
423 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
424 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
425 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
427 part_idx = VP8_SPLITMVMODE_8x8;
430 part_idx = VP8_SPLITMVMODE_4x4;
433 num = vp8_mbsplit_count[part_idx];
434 mbsplits_cur = vp8_mbsplits[part_idx],
435 firstidx = vp8_mbfirstidx[part_idx];
436 mb->partitioning = part_idx;
438 for (n = 0; n < num; n++) {
440 uint32_t left, above;
441 const uint8_t *submv_prob;
444 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
446 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
448 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
450 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
452 submv_prob = get_submv_prob(left, above);
454 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
455 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
456 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
457 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
458 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
460 AV_ZERO32(&mb->bmv[n]);
463 AV_WN32A(&mb->bmv[n], above);
466 AV_WN32A(&mb->bmv[n], left);
473 static av_always_inline
474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
476 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
478 mb + 1 /* top-left */ };
479 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
480 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
482 int cur_sign_bias = s->sign_bias[mb->ref_frame];
483 int8_t *sign_bias = s->sign_bias;
485 uint8_t cnt[4] = { 0 };
486 VP56RangeCoder *c = &s->c;
488 AV_ZERO32(&near_mv[0]);
489 AV_ZERO32(&near_mv[1]);
491 /* Process MB on top, left and top-left */
492 #define MV_EDGE_CHECK(n)\
494 VP8Macroblock *edge = mb_edge[n];\
495 int edge_ref = edge->ref_frame;\
496 if (edge_ref != VP56_FRAME_CURRENT) {\
497 uint32_t mv = AV_RN32A(&edge->mv);\
499 if (cur_sign_bias != sign_bias[edge_ref]) {\
500 /* SWAR negate of the values in mv. */\
502 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
504 if (!n || mv != AV_RN32A(&near_mv[idx]))\
505 AV_WN32A(&near_mv[++idx], mv);\
506 cnt[idx] += 1 + (n != 2);\
508 cnt[CNT_ZERO] += 1 + (n != 2);\
516 mb->partitioning = VP8_SPLITMVMODE_NONE;
517 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
518 mb->mode = VP8_MVMODE_MV;
520 /* If we have three distinct MVs, merge first and last if they're the same */
521 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
522 cnt[CNT_NEAREST] += 1;
524 /* Swap near and nearest if necessary */
525 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
526 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
527 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
530 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
531 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
533 /* Choose the best mv out of 0,0 and the nearest mv */
534 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
535 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
536 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
537 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
539 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
540 mb->mode = VP8_MVMODE_SPLIT;
541 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
543 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
544 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
548 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
552 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
556 mb->mode = VP8_MVMODE_ZERO;
562 static av_always_inline
563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
564 int mb_x, int keyframe)
566 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
569 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
570 uint8_t* const left = s->intra4x4_pred_mode_left;
571 for (y = 0; y < 4; y++) {
572 for (x = 0; x < 4; x++) {
574 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
575 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
576 left[y] = top[x] = *intra4x4;
582 for (i = 0; i < 16; i++)
583 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
587 static av_always_inline
588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
590 VP56RangeCoder *c = &s->c;
592 if (s->segmentation.update_map)
593 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
595 *segment = ref ? *ref : *segment;
596 s->segment = *segment;
598 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
601 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
603 if (mb->mode == MODE_I4x4) {
604 decode_intra4x4_modes(s, c, mb_x, 1);
606 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
607 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
608 AV_WN32A(s->intra4x4_pred_mode_left, modes);
611 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
612 mb->ref_frame = VP56_FRAME_CURRENT;
613 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
615 if (vp56_rac_get_prob_branchy(c, s->prob->last))
616 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
617 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
619 mb->ref_frame = VP56_FRAME_PREVIOUS;
620 s->ref_count[mb->ref_frame-1]++;
622 // motion vectors, 16.3
623 decode_mvs(s, mb, mb_x, mb_y);
626 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
628 if (mb->mode == MODE_I4x4)
629 decode_intra4x4_modes(s, c, mb_x, 0);
631 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
632 mb->ref_frame = VP56_FRAME_CURRENT;
633 mb->partitioning = VP8_SPLITMVMODE_NONE;
634 AV_ZERO32(&mb->bmv[0]);
638 #ifndef decode_block_coeffs_internal
640 * @param c arithmetic bitstream reader context
641 * @param block destination for block coefficients
642 * @param probs probabilities to use when reading trees from the bitstream
643 * @param i initial coeff index, 0 unless a separate DC block is coded
644 * @param zero_nhood the initial prediction context for number of surrounding
645 * all-zero blocks (only left/top, so 0-2)
646 * @param qmul array holding the dc/ac dequant factor at position 0/1
647 * @return 0 if no coeffs were decoded
648 * otherwise, the index of the last coeff decoded plus one
650 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
651 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
652 int i, uint8_t *token_prob, int16_t qmul[2])
657 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
661 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
663 return i; // invalid input; blocks should end with EOB
664 token_prob = probs[i][0];
668 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
670 token_prob = probs[i+1][1];
672 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
673 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
675 coeff += vp56_rac_get_prob(c, token_prob[5]);
679 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
680 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
681 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
684 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
685 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
687 } else { // DCT_CAT3 and up
688 int a = vp56_rac_get_prob(c, token_prob[8]);
689 int b = vp56_rac_get_prob(c, token_prob[9+a]);
690 int cat = (a<<1) + b;
691 coeff = 3 + (8<<cat);
692 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
695 token_prob = probs[i+1][2];
697 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
704 static av_always_inline
705 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
706 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
707 int i, int zero_nhood, int16_t qmul[2])
709 uint8_t *token_prob = probs[i][zero_nhood];
710 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
712 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
715 static av_always_inline
716 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
717 uint8_t t_nnz[9], uint8_t l_nnz[9])
719 int i, x, y, luma_start = 0, luma_ctx = 3;
720 int nnz_pred, nnz, nnz_total = 0;
721 int segment = s->segment;
724 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
725 nnz_pred = t_nnz[8] + l_nnz[8];
727 // decode DC values and do hadamard
728 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
729 s->qmat[segment].luma_dc_qmul);
730 l_nnz[8] = t_nnz[8] = !!nnz;
735 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
737 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
744 for (y = 0; y < 4; y++)
745 for (x = 0; x < 4; x++) {
746 nnz_pred = l_nnz[y] + t_nnz[x];
747 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
748 nnz_pred, s->qmat[segment].luma_qmul);
749 // nnz+block_dc may be one more than the actual last index, but we don't care
750 s->non_zero_count_cache[y][x] = nnz + block_dc;
751 t_nnz[x] = l_nnz[y] = !!nnz;
756 // TODO: what to do about dimensions? 2nd dim for luma is x,
757 // but for chroma it's (y<<1)|x
758 for (i = 4; i < 6; i++)
759 for (y = 0; y < 2; y++)
760 for (x = 0; x < 2; x++) {
761 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
762 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
763 nnz_pred, s->qmat[segment].chroma_qmul);
764 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
765 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
769 // if there were no coded coeffs despite the macroblock not being marked skip,
770 // we MUST not do the inner loop filter and should not do IDCT
771 // Since skip isn't used for bitstream prediction, just manually set it.
776 static av_always_inline
777 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
778 int linesize, int uvlinesize, int simple)
780 AV_COPY128(top_border, src_y + 15*linesize);
782 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
783 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
787 static av_always_inline
788 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
789 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
790 int simple, int xchg)
792 uint8_t *top_border_m1 = top_border-32; // for TL prediction
794 src_cb -= uvlinesize;
795 src_cr -= uvlinesize;
797 #define XCHG(a,b,xchg) do { \
798 if (xchg) AV_SWAP64(b,a); \
799 else AV_COPY64(b,a); \
802 XCHG(top_border_m1+8, src_y-8, xchg);
803 XCHG(top_border, src_y, xchg);
804 XCHG(top_border+8, src_y+8, 1);
805 if (mb_x < mb_width-1)
806 XCHG(top_border+32, src_y+16, 1);
808 // only copy chroma for normal loop filter
809 // or to initialize the top row to 127
810 if (!simple || !mb_y) {
811 XCHG(top_border_m1+16, src_cb-8, xchg);
812 XCHG(top_border_m1+24, src_cr-8, xchg);
813 XCHG(top_border+16, src_cb, 1);
814 XCHG(top_border+24, src_cr, 1);
818 static av_always_inline
819 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
822 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
824 return mb_y ? mode : LEFT_DC_PRED8x8;
828 static av_always_inline
829 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
832 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
834 return mb_y ? mode : HOR_PRED8x8;
838 static av_always_inline
839 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
841 if (mode == DC_PRED8x8) {
842 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
848 static av_always_inline
849 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
853 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
855 return !mb_y ? DC_127_PRED8x8 : mode;
857 return !mb_x ? DC_129_PRED8x8 : mode;
858 case PLANE_PRED8x8 /*TM*/:
859 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
864 static av_always_inline
865 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
868 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
870 return mb_y ? mode : HOR_VP8_PRED;
874 static av_always_inline
875 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
884 case DIAG_DOWN_LEFT_PRED:
886 return !mb_y ? DC_127_PRED : mode;
894 return !mb_x ? DC_129_PRED : mode;
896 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
897 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
898 case DIAG_DOWN_RIGHT_PRED:
899 case VERT_RIGHT_PRED:
908 static av_always_inline
909 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
912 AVCodecContext *avctx = s->avctx;
913 int x, y, mode, nnz, tr;
915 // for the first row, we need to run xchg_mb_border to init the top edge to 127
916 // otherwise, skip it if we aren't going to deblock
917 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
918 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
919 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
920 s->filter.simple, 1);
922 if (mb->mode < MODE_I4x4) {
923 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
924 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
926 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
928 s->hpc.pred16x16[mode](dst[0], s->linesize);
930 uint8_t *ptr = dst[0];
931 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
932 uint8_t tr_top[4] = { 127, 127, 127, 127 };
934 // all blocks on the right edge of the macroblock use bottom edge
935 // the top macroblock for their topright edge
936 uint8_t *tr_right = ptr - s->linesize + 16;
938 // if we're on the right edge of the frame, said edge is extended
939 // from the top macroblock
940 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
941 mb_x == s->mb_width-1) {
942 tr = tr_right[-1]*0x01010101;
943 tr_right = (uint8_t *)&tr;
947 AV_ZERO128(s->non_zero_count_cache);
949 for (y = 0; y < 4; y++) {
950 uint8_t *topright = ptr + 4 - s->linesize;
951 for (x = 0; x < 4; x++) {
952 int copy = 0, linesize = s->linesize;
953 uint8_t *dst = ptr+4*x;
954 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
956 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
961 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
962 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
968 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
970 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
974 copy_dst[3] = ptr[4*x-s->linesize-1];
983 copy_dst[11] = ptr[4*x -1];
984 copy_dst[19] = ptr[4*x+s->linesize -1];
985 copy_dst[27] = ptr[4*x+s->linesize*2-1];
986 copy_dst[35] = ptr[4*x+s->linesize*3-1];
992 s->hpc.pred4x4[mode](dst, topright, linesize);
994 AV_COPY32(ptr+4*x , copy_dst+12);
995 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
996 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
997 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1000 nnz = s->non_zero_count_cache[y][x];
1003 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1005 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1010 ptr += 4*s->linesize;
1015 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1016 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1018 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1020 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1021 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1023 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1024 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1025 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1026 s->filter.simple, 0);
1029 static const uint8_t subpel_idx[3][8] = {
1030 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1031 // also function pointer index
1032 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1033 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1037 * Generic MC function.
1039 * @param s VP8 decoding context
1040 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1041 * @param dst target buffer for block data at block position
1042 * @param src reference picture buffer at origin (0, 0)
1043 * @param mv motion vector (relative to block position) to get pixel data from
1044 * @param x_off horizontal position of block from origin (0, 0)
1045 * @param y_off vertical position of block from origin (0, 0)
1046 * @param block_w width of block (16, 8 or 4)
1047 * @param block_h height of block (always same as block_w)
1048 * @param width width of src/dst plane data
1049 * @param height height of src/dst plane data
1050 * @param linesize size of a single line of plane data, including padding
1051 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1053 static av_always_inline
1054 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1055 int x_off, int y_off, int block_w, int block_h,
1056 int width, int height, int linesize,
1057 vp8_mc_func mc_func[3][3])
1059 uint8_t *src = ref->data[0];
1063 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1064 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1066 x_off += mv->x >> 2;
1067 y_off += mv->y >> 2;
1070 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1071 src += y_off * linesize + x_off;
1072 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1073 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1074 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1075 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1076 x_off - mx_idx, y_off - my_idx, width, height);
1077 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1079 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1081 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1082 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1086 static av_always_inline
1087 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1088 const VP56mv *mv, int x_off, int y_off,
1089 int block_w, int block_h, int width, int height, int linesize,
1090 vp8_mc_func mc_func[3][3])
1092 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1095 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1096 int my = mv->y&7, my_idx = subpel_idx[0][my];
1098 x_off += mv->x >> 3;
1099 y_off += mv->y >> 3;
1102 src1 += y_off * linesize + x_off;
1103 src2 += y_off * linesize + x_off;
1104 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1105 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1106 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1107 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1108 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1109 x_off - mx_idx, y_off - my_idx, width, height);
1110 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1111 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1113 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1114 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1115 x_off - mx_idx, y_off - my_idx, width, height);
1116 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1117 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1119 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1120 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1123 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1124 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1125 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1129 static av_always_inline
1130 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1131 AVFrame *ref_frame, int x_off, int y_off,
1132 int bx_off, int by_off,
1133 int block_w, int block_h,
1134 int width, int height, VP56mv *mv)
1139 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1140 ref_frame, mv, x_off + bx_off, y_off + by_off,
1141 block_w, block_h, width, height, s->linesize,
1142 s->put_pixels_tab[block_w == 8]);
1145 if (s->profile == 3) {
1149 x_off >>= 1; y_off >>= 1;
1150 bx_off >>= 1; by_off >>= 1;
1151 width >>= 1; height >>= 1;
1152 block_w >>= 1; block_h >>= 1;
1153 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1154 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1155 &uvmv, x_off + bx_off, y_off + by_off,
1156 block_w, block_h, width, height, s->uvlinesize,
1157 s->put_pixels_tab[1 + (block_w == 4)]);
1160 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1161 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1162 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1164 /* Don't prefetch refs that haven't been used very often this frame. */
1165 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1166 int x_off = mb_x << 4, y_off = mb_y << 4;
1167 int mx = (mb->mv.x>>2) + x_off + 8;
1168 int my = (mb->mv.y>>2) + y_off;
1169 uint8_t **src= s->framep[ref]->data;
1170 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1171 /* For threading, a ff_thread_await_progress here might be useful, but
1172 * it actually slows down the decoder. Since a bad prefetch doesn't
1173 * generate bad decoder output, we don't run it here. */
1174 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1175 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1176 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1181 * Apply motion vectors to prediction buffer, chapter 18.
1183 static av_always_inline
1184 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1187 int x_off = mb_x << 4, y_off = mb_y << 4;
1188 int width = 16*s->mb_width, height = 16*s->mb_height;
1189 AVFrame *ref = s->framep[mb->ref_frame];
1190 VP56mv *bmv = mb->bmv;
1192 switch (mb->partitioning) {
1193 case VP8_SPLITMVMODE_NONE:
1194 vp8_mc_part(s, dst, ref, x_off, y_off,
1195 0, 0, 16, 16, width, height, &mb->mv);
1197 case VP8_SPLITMVMODE_4x4: {
1202 for (y = 0; y < 4; y++) {
1203 for (x = 0; x < 4; x++) {
1204 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1206 4*x + x_off, 4*y + y_off, 4, 4,
1207 width, height, s->linesize,
1208 s->put_pixels_tab[2]);
1213 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1214 for (y = 0; y < 2; y++) {
1215 for (x = 0; x < 2; x++) {
1216 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1217 mb->bmv[ 2*y * 4 + 2*x+1].x +
1218 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1219 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1220 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1221 mb->bmv[ 2*y * 4 + 2*x+1].y +
1222 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1223 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1224 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1225 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1226 if (s->profile == 3) {
1230 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1231 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1232 4*x + x_off, 4*y + y_off, 4, 4,
1233 width, height, s->uvlinesize,
1234 s->put_pixels_tab[2]);
1239 case VP8_SPLITMVMODE_16x8:
1240 vp8_mc_part(s, dst, ref, x_off, y_off,
1241 0, 0, 16, 8, width, height, &bmv[0]);
1242 vp8_mc_part(s, dst, ref, x_off, y_off,
1243 0, 8, 16, 8, width, height, &bmv[1]);
1245 case VP8_SPLITMVMODE_8x16:
1246 vp8_mc_part(s, dst, ref, x_off, y_off,
1247 0, 0, 8, 16, width, height, &bmv[0]);
1248 vp8_mc_part(s, dst, ref, x_off, y_off,
1249 8, 0, 8, 16, width, height, &bmv[1]);
1251 case VP8_SPLITMVMODE_8x8:
1252 vp8_mc_part(s, dst, ref, x_off, y_off,
1253 0, 0, 8, 8, width, height, &bmv[0]);
1254 vp8_mc_part(s, dst, ref, x_off, y_off,
1255 8, 0, 8, 8, width, height, &bmv[1]);
1256 vp8_mc_part(s, dst, ref, x_off, y_off,
1257 0, 8, 8, 8, width, height, &bmv[2]);
1258 vp8_mc_part(s, dst, ref, x_off, y_off,
1259 8, 8, 8, 8, width, height, &bmv[3]);
1264 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1268 if (mb->mode != MODE_I4x4) {
1269 uint8_t *y_dst = dst[0];
1270 for (y = 0; y < 4; y++) {
1271 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1273 if (nnz4&~0x01010101) {
1274 for (x = 0; x < 4; x++) {
1275 if ((uint8_t)nnz4 == 1)
1276 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1277 else if((uint8_t)nnz4 > 1)
1278 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1284 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1287 y_dst += 4*s->linesize;
1291 for (ch = 0; ch < 2; ch++) {
1292 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1294 uint8_t *ch_dst = dst[1+ch];
1295 if (nnz4&~0x01010101) {
1296 for (y = 0; y < 2; y++) {
1297 for (x = 0; x < 2; x++) {
1298 if ((uint8_t)nnz4 == 1)
1299 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1300 else if((uint8_t)nnz4 > 1)
1301 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1304 goto chroma_idct_end;
1306 ch_dst += 4*s->uvlinesize;
1309 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1316 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1318 int interior_limit, filter_level;
1320 if (s->segmentation.enabled) {
1321 filter_level = s->segmentation.filter_level[s->segment];
1322 if (!s->segmentation.absolute_vals)
1323 filter_level += s->filter.level;
1325 filter_level = s->filter.level;
1327 if (s->lf_delta.enabled) {
1328 filter_level += s->lf_delta.ref[mb->ref_frame];
1329 filter_level += s->lf_delta.mode[mb->mode];
1332 filter_level = av_clip_uintp2(filter_level, 6);
1334 interior_limit = filter_level;
1335 if (s->filter.sharpness) {
1336 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1337 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1339 interior_limit = FFMAX(interior_limit, 1);
1341 f->filter_level = filter_level;
1342 f->inner_limit = interior_limit;
1343 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1346 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1348 int mbedge_lim, bedge_lim, hev_thresh;
1349 int filter_level = f->filter_level;
1350 int inner_limit = f->inner_limit;
1351 int inner_filter = f->inner_filter;
1352 int linesize = s->linesize;
1353 int uvlinesize = s->uvlinesize;
1354 static const uint8_t hev_thresh_lut[2][64] = {
1355 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1356 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1357 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1359 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1360 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1361 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1368 bedge_lim = 2*filter_level + inner_limit;
1369 mbedge_lim = bedge_lim + 4;
1371 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1374 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1375 mbedge_lim, inner_limit, hev_thresh);
1376 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1377 mbedge_lim, inner_limit, hev_thresh);
1381 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1382 inner_limit, hev_thresh);
1383 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1384 inner_limit, hev_thresh);
1385 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1386 inner_limit, hev_thresh);
1387 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1388 uvlinesize, bedge_lim,
1389 inner_limit, hev_thresh);
1393 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1394 mbedge_lim, inner_limit, hev_thresh);
1395 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1396 mbedge_lim, inner_limit, hev_thresh);
1400 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1401 linesize, bedge_lim,
1402 inner_limit, hev_thresh);
1403 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1404 linesize, bedge_lim,
1405 inner_limit, hev_thresh);
1406 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1407 linesize, bedge_lim,
1408 inner_limit, hev_thresh);
1409 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1410 dst[2] + 4 * uvlinesize,
1411 uvlinesize, bedge_lim,
1412 inner_limit, hev_thresh);
1416 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1418 int mbedge_lim, bedge_lim;
1419 int filter_level = f->filter_level;
1420 int inner_limit = f->inner_limit;
1421 int inner_filter = f->inner_filter;
1422 int linesize = s->linesize;
1427 bedge_lim = 2*filter_level + inner_limit;
1428 mbedge_lim = bedge_lim + 4;
1431 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1433 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1434 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1435 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1439 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1441 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1442 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1443 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1447 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1449 VP8FilterStrength *f = s->filter_strength;
1451 curframe->data[0] + 16*mb_y*s->linesize,
1452 curframe->data[1] + 8*mb_y*s->uvlinesize,
1453 curframe->data[2] + 8*mb_y*s->uvlinesize
1457 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1458 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1459 filter_mb(s, dst, f++, mb_x, mb_y);
1466 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1468 VP8FilterStrength *f = s->filter_strength;
1469 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1472 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1473 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1474 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1479 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1482 VP8Context *s = avctx->priv_data;
1483 int ret, mb_x, mb_y, i, y, referenced;
1484 enum AVDiscard skip_thresh;
1485 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1487 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1490 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1491 || s->update_altref == VP56_FRAME_CURRENT;
1493 skip_thresh = !referenced ? AVDISCARD_NONREF :
1494 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1496 if (avctx->skip_frame >= skip_thresh) {
1500 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1502 // release no longer referenced frames
1503 for (i = 0; i < 5; i++)
1504 if (s->frames[i].data[0] &&
1505 &s->frames[i] != prev_frame &&
1506 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1507 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1508 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1509 ff_thread_release_buffer(avctx, &s->frames[i]);
1511 // find a free buffer
1512 for (i = 0; i < 5; i++)
1513 if (&s->frames[i] != prev_frame &&
1514 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1515 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1516 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1517 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1521 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1524 if (curframe->data[0])
1525 ff_thread_release_buffer(avctx, curframe);
1527 curframe->key_frame = s->keyframe;
1528 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1529 curframe->reference = referenced ? 3 : 0;
1530 curframe->ref_index[0] = s->segmentation_map;
1531 if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1532 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1536 // check if golden and altref are swapped
1537 if (s->update_altref != VP56_FRAME_NONE) {
1538 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1540 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1542 if (s->update_golden != VP56_FRAME_NONE) {
1543 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1545 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1547 if (s->update_last) {
1548 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1550 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1552 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1554 ff_thread_finish_setup(avctx);
1556 // Given that arithmetic probabilities are updated every frame, it's quite likely
1557 // that the values we have on a random interframe are complete junk if we didn't
1558 // start decode on a keyframe. So just don't display anything rather than junk.
1559 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1560 !s->framep[VP56_FRAME_GOLDEN] ||
1561 !s->framep[VP56_FRAME_GOLDEN2])) {
1562 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1563 return AVERROR_INVALIDDATA;
1566 s->linesize = curframe->linesize[0];
1567 s->uvlinesize = curframe->linesize[1];
1569 if (!s->edge_emu_buffer)
1570 s->edge_emu_buffer = av_malloc(21*s->linesize);
1572 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1574 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1575 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1577 // top edge of 127 for intra prediction
1578 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1579 s->top_border[0][15] = s->top_border[0][23] = 127;
1580 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1582 memset(s->ref_count, 0, sizeof(s->ref_count));
1584 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1586 #define MARGIN (16 << 2)
1587 s->mv_min.y = -MARGIN;
1588 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1590 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1591 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1592 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1593 int mb_xy = mb_y*s->mb_width;
1595 curframe->data[0] + 16*mb_y*s->linesize,
1596 curframe->data[1] + 8*mb_y*s->uvlinesize,
1597 curframe->data[2] + 8*mb_y*s->uvlinesize
1600 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1601 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1602 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1604 // left edge of 129 for intra prediction
1605 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1606 for (i = 0; i < 3; i++)
1607 for (y = 0; y < 16>>!!i; y++)
1608 dst[i][y*curframe->linesize[i]-1] = 129;
1609 if (mb_y == 1) // top left edge is also 129
1610 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1613 s->mv_min.x = -MARGIN;
1614 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1615 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1616 ff_thread_await_progress(prev_frame, mb_y, 0);
1618 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1619 /* Prefetch the current frame, 4 MBs ahead */
1620 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1621 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1623 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1624 prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1626 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1629 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1631 if (mb->mode <= MODE_I4x4)
1632 intra_predict(s, dst, mb, mb_x, mb_y);
1634 inter_predict(s, dst, mb, mb_x, mb_y);
1636 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1639 idct_mb(s, dst, mb);
1641 AV_ZERO64(s->left_nnz);
1642 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1644 // Reset DC block predictors if they would exist if the mb had coefficients
1645 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1647 s->top_nnz[mb_x][8] = 0;
1651 if (s->deblock_filter)
1652 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1654 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1662 if (s->deblock_filter) {
1663 if (s->filter.simple)
1664 filter_mb_row_simple(s, curframe, mb_y);
1666 filter_mb_row(s, curframe, mb_y);
1671 ff_thread_report_progress(curframe, mb_y, 0);
1674 ff_thread_report_progress(curframe, INT_MAX, 0);
1676 // if future frames don't use the updated probabilities,
1677 // reset them to the values we saved
1678 if (!s->update_probabilities)
1679 s->prob[0] = s->prob[1];
1681 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1683 if (!s->invisible) {
1684 *(AVFrame*)data = *curframe;
1685 *data_size = sizeof(AVFrame);
1691 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1693 VP8Context *s = avctx->priv_data;
1696 avctx->pix_fmt = PIX_FMT_YUV420P;
1698 dsputil_init(&s->dsp, avctx);
1699 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1700 ff_vp8dsp_init(&s->vp8dsp);
1705 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1707 vp8_decode_flush(avctx);
1711 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1713 VP8Context *s = avctx->priv_data;
1720 #define REBASE(pic) \
1721 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1723 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1725 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1727 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1728 s->segmentation = s_src->segmentation;
1729 s->lf_delta = s_src->lf_delta;
1730 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1732 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1733 s->framep[0] = REBASE(s_src->next_framep[0]);
1734 s->framep[1] = REBASE(s_src->next_framep[1]);
1735 s->framep[2] = REBASE(s_src->next_framep[2]);
1736 s->framep[3] = REBASE(s_src->next_framep[3]);
1741 AVCodec ff_vp8_decoder = {
1750 CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1751 .flush = vp8_decode_flush,
1752 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1753 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1754 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),