2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/imgutils.h"
29 #include "rectangle.h"
36 static void vp8_decode_flush(AVCodecContext *avctx)
38 VP8Context *s = avctx->priv_data;
41 if (!avctx->is_copy) {
42 for (i = 0; i < 5; i++)
43 if (s->frames[i].data[0])
44 ff_thread_release_buffer(avctx, &s->frames[i]);
46 memset(s->framep, 0, sizeof(s->framep));
48 av_freep(&s->macroblocks_base);
49 av_freep(&s->filter_strength);
50 av_freep(&s->intra4x4_pred_mode_top);
51 av_freep(&s->top_nnz);
52 av_freep(&s->edge_emu_buffer);
53 av_freep(&s->top_border);
54 av_freep(&s->segmentation_map);
56 s->macroblocks = NULL;
59 static int update_dimensions(VP8Context *s, int width, int height)
61 if (width != s->avctx->width ||
62 height != s->avctx->height) {
63 if (av_image_check_size(width, height, 0, s->avctx))
64 return AVERROR_INVALIDDATA;
66 vp8_decode_flush(s->avctx);
68 avcodec_set_dimensions(s->avctx, width, height);
71 s->mb_width = (s->avctx->coded_width +15) / 16;
72 s->mb_height = (s->avctx->coded_height+15) / 16;
74 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
75 s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
76 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
77 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
78 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
79 s->segmentation_map = av_mallocz(s->mb_width*s->mb_height);
81 if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
82 !s->top_nnz || !s->top_border || !s->segmentation_map)
83 return AVERROR(ENOMEM);
85 s->macroblocks = s->macroblocks_base + 1;
90 static void parse_segment_info(VP8Context *s)
92 VP56RangeCoder *c = &s->c;
95 s->segmentation.update_map = vp8_rac_get(c);
97 if (vp8_rac_get(c)) { // update segment feature data
98 s->segmentation.absolute_vals = vp8_rac_get(c);
100 for (i = 0; i < 4; i++)
101 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
103 for (i = 0; i < 4; i++)
104 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
106 if (s->segmentation.update_map)
107 for (i = 0; i < 3; i++)
108 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
111 static void update_lf_deltas(VP8Context *s)
113 VP56RangeCoder *c = &s->c;
116 for (i = 0; i < 4; i++)
117 s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6);
119 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
120 s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
125 const uint8_t *sizes = buf;
128 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
130 buf += 3*(s->num_coeff_partitions-1);
131 buf_size -= 3*(s->num_coeff_partitions-1);
135 for (i = 0; i < s->num_coeff_partitions-1; i++) {
136 int size = AV_RL24(sizes + 3*i);
137 if (buf_size - size < 0)
140 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
144 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
149 static void get_quants(VP8Context *s)
151 VP56RangeCoder *c = &s->c;
154 int yac_qi = vp8_rac_get_uint(c, 7);
155 int ydc_delta = vp8_rac_get_sint(c, 4);
156 int y2dc_delta = vp8_rac_get_sint(c, 4);
157 int y2ac_delta = vp8_rac_get_sint(c, 4);
158 int uvdc_delta = vp8_rac_get_sint(c, 4);
159 int uvac_delta = vp8_rac_get_sint(c, 4);
161 for (i = 0; i < 4; i++) {
162 if (s->segmentation.enabled) {
163 base_qi = s->segmentation.base_quant[i];
164 if (!s->segmentation.absolute_vals)
169 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
170 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)];
171 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
172 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
173 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
174 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
176 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
177 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
182 * Determine which buffers golden and altref should be updated with after this frame.
183 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
185 * Intra frames update all 3 references
186 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
187 * If the update (golden|altref) flag is set, it's updated with the current frame
188 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
189 * If the flag is not set, the number read means:
191 * 1: VP56_FRAME_PREVIOUS
192 * 2: update golden with altref, or update altref with golden
194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
196 VP56RangeCoder *c = &s->c;
199 return VP56_FRAME_CURRENT;
201 switch (vp8_rac_get_uint(c, 2)) {
203 return VP56_FRAME_PREVIOUS;
205 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
207 return VP56_FRAME_NONE;
210 static void update_refs(VP8Context *s)
212 VP56RangeCoder *c = &s->c;
214 int update_golden = vp8_rac_get(c);
215 int update_altref = vp8_rac_get(c);
217 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
218 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
223 VP56RangeCoder *c = &s->c;
224 int header_size, hscale, vscale, i, j, k, l, m, ret;
225 int width = s->avctx->width;
226 int height = s->avctx->height;
228 s->keyframe = !(buf[0] & 1);
229 s->profile = (buf[0]>>1) & 7;
230 s->invisible = !(buf[0] & 0x10);
231 header_size = AV_RL24(buf) >> 5;
236 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
239 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
240 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
241 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
243 if (header_size > buf_size - 7*s->keyframe) {
244 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
245 return AVERROR_INVALIDDATA;
249 if (AV_RL24(buf) != 0x2a019d) {
250 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
251 return AVERROR_INVALIDDATA;
253 width = AV_RL16(buf+3) & 0x3fff;
254 height = AV_RL16(buf+5) & 0x3fff;
255 hscale = buf[4] >> 6;
256 vscale = buf[6] >> 6;
260 if (hscale || vscale)
261 av_log_missing_feature(s->avctx, "Upscaling", 1);
263 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
264 for (i = 0; i < 4; i++)
265 for (j = 0; j < 16; j++)
266 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
267 sizeof(s->prob->token[i][j]));
268 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
269 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
270 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
271 memset(&s->segmentation, 0, sizeof(s->segmentation));
274 if (!s->macroblocks_base || /* first frame */
275 width != s->avctx->width || height != s->avctx->height) {
276 if ((ret = update_dimensions(s, width, height) < 0))
280 ff_vp56_init_range_decoder(c, buf, header_size);
282 buf_size -= header_size;
286 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
287 vp8_rac_get(c); // whether we can skip clamping in dsp functions
290 if ((s->segmentation.enabled = vp8_rac_get(c)))
291 parse_segment_info(s);
293 s->segmentation.update_map = 0; // FIXME: move this to some init function?
295 s->filter.simple = vp8_rac_get(c);
296 s->filter.level = vp8_rac_get_uint(c, 6);
297 s->filter.sharpness = vp8_rac_get_uint(c, 3);
299 if ((s->lf_delta.enabled = vp8_rac_get(c)))
303 if (setup_partitions(s, buf, buf_size)) {
304 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
305 return AVERROR_INVALIDDATA;
312 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
313 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
316 // if we aren't saving this frame's probabilities for future frames,
317 // make a copy of the current probabilities
318 if (!(s->update_probabilities = vp8_rac_get(c)))
319 s->prob[1] = s->prob[0];
321 s->update_last = s->keyframe || vp8_rac_get(c);
323 for (i = 0; i < 4; i++)
324 for (j = 0; j < 8; j++)
325 for (k = 0; k < 3; k++)
326 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
327 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
328 int prob = vp8_rac_get_uint(c, 8);
329 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
330 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
333 if ((s->mbskip_enabled = vp8_rac_get(c)))
334 s->prob->mbskip = vp8_rac_get_uint(c, 8);
337 s->prob->intra = vp8_rac_get_uint(c, 8);
338 s->prob->last = vp8_rac_get_uint(c, 8);
339 s->prob->golden = vp8_rac_get_uint(c, 8);
342 for (i = 0; i < 4; i++)
343 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
345 for (i = 0; i < 3; i++)
346 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
348 // 17.2 MV probability update
349 for (i = 0; i < 2; i++)
350 for (j = 0; j < 19; j++)
351 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
352 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
360 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
361 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
365 * Motion vector coding, 17.1.
367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
371 if (vp56_rac_get_prob_branchy(c, p[0])) {
374 for (i = 0; i < 3; i++)
375 x += vp56_rac_get_prob(c, p[9 + i]) << i;
376 for (i = 9; i > 3; i--)
377 x += vp56_rac_get_prob(c, p[9 + i]) << i;
378 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
382 const uint8_t *ps = p+2;
383 bit = vp56_rac_get_prob(c, *ps);
386 bit = vp56_rac_get_prob(c, *ps);
389 x += vp56_rac_get_prob(c, *ps);
392 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
395 static av_always_inline
396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
399 return vp8_submv_prob[4-!!left];
401 return vp8_submv_prob[2];
402 return vp8_submv_prob[1-!!left];
406 * Split motion vector prediction, 16.4.
407 * @returns the number of motion vectors parsed (2, 4 or 16)
409 static av_always_inline
410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
414 VP8Macroblock *top_mb = &mb[2];
415 VP8Macroblock *left_mb = &mb[-1];
416 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
417 *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
418 *mbsplits_cur, *firstidx;
419 VP56mv *top_mv = top_mb->bmv;
420 VP56mv *left_mv = left_mb->bmv;
421 VP56mv *cur_mv = mb->bmv;
423 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
424 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
425 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
427 part_idx = VP8_SPLITMVMODE_8x8;
430 part_idx = VP8_SPLITMVMODE_4x4;
433 num = vp8_mbsplit_count[part_idx];
434 mbsplits_cur = vp8_mbsplits[part_idx],
435 firstidx = vp8_mbfirstidx[part_idx];
436 mb->partitioning = part_idx;
438 for (n = 0; n < num; n++) {
440 uint32_t left, above;
441 const uint8_t *submv_prob;
444 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
446 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
448 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
450 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
452 submv_prob = get_submv_prob(left, above);
454 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
455 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
456 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
457 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
458 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
460 AV_ZERO32(&mb->bmv[n]);
463 AV_WN32A(&mb->bmv[n], above);
466 AV_WN32A(&mb->bmv[n], left);
473 static av_always_inline
474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
476 VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
478 mb + 1 /* top-left */ };
479 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
480 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
482 int cur_sign_bias = s->sign_bias[mb->ref_frame];
483 int8_t *sign_bias = s->sign_bias;
485 uint8_t cnt[4] = { 0 };
486 VP56RangeCoder *c = &s->c;
488 AV_ZERO32(&near_mv[0]);
489 AV_ZERO32(&near_mv[1]);
491 /* Process MB on top, left and top-left */
492 #define MV_EDGE_CHECK(n)\
494 VP8Macroblock *edge = mb_edge[n];\
495 int edge_ref = edge->ref_frame;\
496 if (edge_ref != VP56_FRAME_CURRENT) {\
497 uint32_t mv = AV_RN32A(&edge->mv);\
499 if (cur_sign_bias != sign_bias[edge_ref]) {\
500 /* SWAR negate of the values in mv. */\
502 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
504 if (!n || mv != AV_RN32A(&near_mv[idx]))\
505 AV_WN32A(&near_mv[++idx], mv);\
506 cnt[idx] += 1 + (n != 2);\
508 cnt[CNT_ZERO] += 1 + (n != 2);\
516 mb->partitioning = VP8_SPLITMVMODE_NONE;
517 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
518 mb->mode = VP8_MVMODE_MV;
520 /* If we have three distinct MVs, merge first and last if they're the same */
521 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
522 cnt[CNT_NEAREST] += 1;
524 /* Swap near and nearest if necessary */
525 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
526 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
527 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
530 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
531 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
533 /* Choose the best mv out of 0,0 and the nearest mv */
534 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
535 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
536 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
537 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
539 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
540 mb->mode = VP8_MVMODE_SPLIT;
541 mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
543 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
544 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
548 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
552 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
556 mb->mode = VP8_MVMODE_ZERO;
562 static av_always_inline
563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
564 int mb_x, int keyframe)
566 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
569 uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
570 uint8_t* const left = s->intra4x4_pred_mode_left;
571 for (y = 0; y < 4; y++) {
572 for (x = 0; x < 4; x++) {
574 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
575 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
576 left[y] = top[x] = *intra4x4;
582 for (i = 0; i < 16; i++)
583 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
587 static av_always_inline
588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
590 VP56RangeCoder *c = &s->c;
592 if (s->segmentation.update_map)
593 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
595 *segment = ref ? *ref : *segment;
596 s->segment = *segment;
598 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
601 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
603 if (mb->mode == MODE_I4x4) {
604 decode_intra4x4_modes(s, c, mb_x, 1);
606 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
607 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
608 AV_WN32A(s->intra4x4_pred_mode_left, modes);
611 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
612 mb->ref_frame = VP56_FRAME_CURRENT;
613 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
615 if (vp56_rac_get_prob_branchy(c, s->prob->last))
616 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
617 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
619 mb->ref_frame = VP56_FRAME_PREVIOUS;
620 s->ref_count[mb->ref_frame-1]++;
622 // motion vectors, 16.3
623 decode_mvs(s, mb, mb_x, mb_y);
626 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
628 if (mb->mode == MODE_I4x4)
629 decode_intra4x4_modes(s, c, mb_x, 0);
631 s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
632 mb->ref_frame = VP56_FRAME_CURRENT;
633 mb->partitioning = VP8_SPLITMVMODE_NONE;
634 AV_ZERO32(&mb->bmv[0]);
638 #ifndef decode_block_coeffs_internal
640 * @param c arithmetic bitstream reader context
641 * @param block destination for block coefficients
642 * @param probs probabilities to use when reading trees from the bitstream
643 * @param i initial coeff index, 0 unless a separate DC block is coded
644 * @param zero_nhood the initial prediction context for number of surrounding
645 * all-zero blocks (only left/top, so 0-2)
646 * @param qmul array holding the dc/ac dequant factor at position 0/1
647 * @return 0 if no coeffs were decoded
648 * otherwise, the index of the last coeff decoded plus one
650 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
651 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
652 int i, uint8_t *token_prob, int16_t qmul[2])
657 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
661 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
663 return i; // invalid input; blocks should end with EOB
664 token_prob = probs[i][0];
668 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
670 token_prob = probs[i+1][1];
672 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
673 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
675 coeff += vp56_rac_get_prob(c, token_prob[5]);
679 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
680 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
681 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
684 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
685 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
687 } else { // DCT_CAT3 and up
688 int a = vp56_rac_get_prob(c, token_prob[8]);
689 int b = vp56_rac_get_prob(c, token_prob[9+a]);
690 int cat = (a<<1) + b;
691 coeff = 3 + (8<<cat);
692 coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
695 token_prob = probs[i+1][2];
697 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
704 static av_always_inline
705 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
706 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
707 int i, int zero_nhood, int16_t qmul[2])
709 uint8_t *token_prob = probs[i][zero_nhood];
710 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
712 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
715 static av_always_inline
716 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
717 uint8_t t_nnz[9], uint8_t l_nnz[9])
719 int i, x, y, luma_start = 0, luma_ctx = 3;
720 int nnz_pred, nnz, nnz_total = 0;
721 int segment = s->segment;
724 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
725 nnz_pred = t_nnz[8] + l_nnz[8];
727 // decode DC values and do hadamard
728 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
729 s->qmat[segment].luma_dc_qmul);
730 l_nnz[8] = t_nnz[8] = !!nnz;
735 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
737 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
744 for (y = 0; y < 4; y++)
745 for (x = 0; x < 4; x++) {
746 nnz_pred = l_nnz[y] + t_nnz[x];
747 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
748 nnz_pred, s->qmat[segment].luma_qmul);
749 // nnz+block_dc may be one more than the actual last index, but we don't care
750 s->non_zero_count_cache[y][x] = nnz + block_dc;
751 t_nnz[x] = l_nnz[y] = !!nnz;
756 // TODO: what to do about dimensions? 2nd dim for luma is x,
757 // but for chroma it's (y<<1)|x
758 for (i = 4; i < 6; i++)
759 for (y = 0; y < 2; y++)
760 for (x = 0; x < 2; x++) {
761 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
762 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
763 nnz_pred, s->qmat[segment].chroma_qmul);
764 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
765 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
769 // if there were no coded coeffs despite the macroblock not being marked skip,
770 // we MUST not do the inner loop filter and should not do IDCT
771 // Since skip isn't used for bitstream prediction, just manually set it.
776 static av_always_inline
777 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
778 int linesize, int uvlinesize, int simple)
780 AV_COPY128(top_border, src_y + 15*linesize);
782 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
783 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
787 static av_always_inline
788 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
789 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
790 int simple, int xchg)
792 uint8_t *top_border_m1 = top_border-32; // for TL prediction
794 src_cb -= uvlinesize;
795 src_cr -= uvlinesize;
797 #define XCHG(a,b,xchg) do { \
798 if (xchg) AV_SWAP64(b,a); \
799 else AV_COPY64(b,a); \
802 XCHG(top_border_m1+8, src_y-8, xchg);
803 XCHG(top_border, src_y, xchg);
804 XCHG(top_border+8, src_y+8, 1);
805 if (mb_x < mb_width-1)
806 XCHG(top_border+32, src_y+16, 1);
808 // only copy chroma for normal loop filter
809 // or to initialize the top row to 127
810 if (!simple || !mb_y) {
811 XCHG(top_border_m1+16, src_cb-8, xchg);
812 XCHG(top_border_m1+24, src_cr-8, xchg);
813 XCHG(top_border+16, src_cb, 1);
814 XCHG(top_border+24, src_cr, 1);
818 static av_always_inline
819 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
822 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
824 return mb_y ? mode : LEFT_DC_PRED8x8;
828 static av_always_inline
829 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
832 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
834 return mb_y ? mode : HOR_PRED8x8;
838 static av_always_inline
839 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
841 if (mode == DC_PRED8x8) {
842 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
848 static av_always_inline
849 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
853 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
855 return !mb_y ? DC_127_PRED8x8 : mode;
857 return !mb_x ? DC_129_PRED8x8 : mode;
858 case PLANE_PRED8x8 /*TM*/:
859 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
864 static av_always_inline
865 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
868 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
870 return mb_y ? mode : HOR_VP8_PRED;
874 static av_always_inline
875 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
884 case DIAG_DOWN_LEFT_PRED:
886 return !mb_y ? DC_127_PRED : mode;
894 return !mb_x ? DC_129_PRED : mode;
896 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
897 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
898 case DIAG_DOWN_RIGHT_PRED:
899 case VERT_RIGHT_PRED:
908 static av_always_inline
909 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
912 AVCodecContext *avctx = s->avctx;
913 int x, y, mode, nnz, tr;
915 // for the first row, we need to run xchg_mb_border to init the top edge to 127
916 // otherwise, skip it if we aren't going to deblock
917 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
918 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
919 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
920 s->filter.simple, 1);
922 if (mb->mode < MODE_I4x4) {
923 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
924 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
926 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
928 s->hpc.pred16x16[mode](dst[0], s->linesize);
930 uint8_t *ptr = dst[0];
931 uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
932 uint8_t tr_top[4] = { 127, 127, 127, 127 };
934 // all blocks on the right edge of the macroblock use bottom edge
935 // the top macroblock for their topright edge
936 uint8_t *tr_right = ptr - s->linesize + 16;
938 // if we're on the right edge of the frame, said edge is extended
939 // from the top macroblock
940 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
941 mb_x == s->mb_width-1) {
942 tr = tr_right[-1]*0x01010101;
943 tr_right = (uint8_t *)&tr;
947 AV_ZERO128(s->non_zero_count_cache);
949 for (y = 0; y < 4; y++) {
950 uint8_t *topright = ptr + 4 - s->linesize;
951 for (x = 0; x < 4; x++) {
952 int copy = 0, linesize = s->linesize;
953 uint8_t *dst = ptr+4*x;
954 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
956 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
961 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
962 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
968 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
970 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
974 copy_dst[3] = ptr[4*x-s->linesize-1];
983 copy_dst[11] = ptr[4*x -1];
984 copy_dst[19] = ptr[4*x+s->linesize -1];
985 copy_dst[27] = ptr[4*x+s->linesize*2-1];
986 copy_dst[35] = ptr[4*x+s->linesize*3-1];
992 s->hpc.pred4x4[mode](dst, topright, linesize);
994 AV_COPY32(ptr+4*x , copy_dst+12);
995 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
996 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
997 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1000 nnz = s->non_zero_count_cache[y][x];
1003 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1005 s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1010 ptr += 4*s->linesize;
1015 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1016 mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1018 mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1020 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1021 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1023 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1024 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1025 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1026 s->filter.simple, 0);
1029 static const uint8_t subpel_idx[3][8] = {
1030 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1031 // also function pointer index
1032 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1033 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1037 * Generic MC function.
1039 * @param s VP8 decoding context
1040 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1041 * @param dst target buffer for block data at block position
1042 * @param src reference picture buffer at origin (0, 0)
1043 * @param mv motion vector (relative to block position) to get pixel data from
1044 * @param x_off horizontal position of block from origin (0, 0)
1045 * @param y_off vertical position of block from origin (0, 0)
1046 * @param block_w width of block (16, 8 or 4)
1047 * @param block_h height of block (always same as block_w)
1048 * @param width width of src/dst plane data
1049 * @param height height of src/dst plane data
1050 * @param linesize size of a single line of plane data, including padding
1051 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1053 static av_always_inline
1054 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1055 int x_off, int y_off, int block_w, int block_h,
1056 int width, int height, int linesize,
1057 vp8_mc_func mc_func[3][3])
1059 uint8_t *src = ref->data[0];
1063 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1064 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1066 x_off += mv->x >> 2;
1067 y_off += mv->y >> 2;
1070 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1071 src += y_off * linesize + x_off;
1072 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1073 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1074 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1075 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1076 x_off - mx_idx, y_off - my_idx, width, height);
1077 src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1079 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1081 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1082 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1086 static av_always_inline
1087 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1088 const VP56mv *mv, int x_off, int y_off,
1089 int block_w, int block_h, int width, int height, int linesize,
1090 vp8_mc_func mc_func[3][3])
1092 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1095 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1096 int my = mv->y&7, my_idx = subpel_idx[0][my];
1098 x_off += mv->x >> 3;
1099 y_off += mv->y >> 3;
1102 src1 += y_off * linesize + x_off;
1103 src2 += y_off * linesize + x_off;
1104 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1105 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1106 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1107 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1108 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1109 x_off - mx_idx, y_off - my_idx, width, height);
1110 src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1111 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1113 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1114 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1115 x_off - mx_idx, y_off - my_idx, width, height);
1116 src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1117 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1119 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1120 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1123 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1124 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1125 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1129 static av_always_inline
1130 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1131 AVFrame *ref_frame, int x_off, int y_off,
1132 int bx_off, int by_off,
1133 int block_w, int block_h,
1134 int width, int height, VP56mv *mv)
1139 vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1140 ref_frame, mv, x_off + bx_off, y_off + by_off,
1141 block_w, block_h, width, height, s->linesize,
1142 s->put_pixels_tab[block_w == 8]);
1145 if (s->profile == 3) {
1149 x_off >>= 1; y_off >>= 1;
1150 bx_off >>= 1; by_off >>= 1;
1151 width >>= 1; height >>= 1;
1152 block_w >>= 1; block_h >>= 1;
1153 vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1154 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1155 &uvmv, x_off + bx_off, y_off + by_off,
1156 block_w, block_h, width, height, s->uvlinesize,
1157 s->put_pixels_tab[1 + (block_w == 4)]);
1160 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1161 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1162 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1164 /* Don't prefetch refs that haven't been used very often this frame. */
1165 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1166 int x_off = mb_x << 4, y_off = mb_y << 4;
1167 int mx = (mb->mv.x>>2) + x_off + 8;
1168 int my = (mb->mv.y>>2) + y_off;
1169 uint8_t **src= s->framep[ref]->data;
1170 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1171 /* For threading, a ff_thread_await_progress here might be useful, but
1172 * it actually slows down the decoder. Since a bad prefetch doesn't
1173 * generate bad decoder output, we don't run it here. */
1174 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1175 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1176 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1181 * Apply motion vectors to prediction buffer, chapter 18.
1183 static av_always_inline
1184 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1187 int x_off = mb_x << 4, y_off = mb_y << 4;
1188 int width = 16*s->mb_width, height = 16*s->mb_height;
1189 AVFrame *ref = s->framep[mb->ref_frame];
1190 VP56mv *bmv = mb->bmv;
1192 switch (mb->partitioning) {
1193 case VP8_SPLITMVMODE_NONE:
1194 vp8_mc_part(s, dst, ref, x_off, y_off,
1195 0, 0, 16, 16, width, height, &mb->mv);
1197 case VP8_SPLITMVMODE_4x4: {
1202 for (y = 0; y < 4; y++) {
1203 for (x = 0; x < 4; x++) {
1204 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1206 4*x + x_off, 4*y + y_off, 4, 4,
1207 width, height, s->linesize,
1208 s->put_pixels_tab[2]);
1213 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1214 for (y = 0; y < 2; y++) {
1215 for (x = 0; x < 2; x++) {
1216 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1217 mb->bmv[ 2*y * 4 + 2*x+1].x +
1218 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1219 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1220 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1221 mb->bmv[ 2*y * 4 + 2*x+1].y +
1222 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1223 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1224 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1225 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1226 if (s->profile == 3) {
1230 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1231 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1232 4*x + x_off, 4*y + y_off, 4, 4,
1233 width, height, s->uvlinesize,
1234 s->put_pixels_tab[2]);
1239 case VP8_SPLITMVMODE_16x8:
1240 vp8_mc_part(s, dst, ref, x_off, y_off,
1241 0, 0, 16, 8, width, height, &bmv[0]);
1242 vp8_mc_part(s, dst, ref, x_off, y_off,
1243 0, 8, 16, 8, width, height, &bmv[1]);
1245 case VP8_SPLITMVMODE_8x16:
1246 vp8_mc_part(s, dst, ref, x_off, y_off,
1247 0, 0, 8, 16, width, height, &bmv[0]);
1248 vp8_mc_part(s, dst, ref, x_off, y_off,
1249 8, 0, 8, 16, width, height, &bmv[1]);
1251 case VP8_SPLITMVMODE_8x8:
1252 vp8_mc_part(s, dst, ref, x_off, y_off,
1253 0, 0, 8, 8, width, height, &bmv[0]);
1254 vp8_mc_part(s, dst, ref, x_off, y_off,
1255 8, 0, 8, 8, width, height, &bmv[1]);
1256 vp8_mc_part(s, dst, ref, x_off, y_off,
1257 0, 8, 8, 8, width, height, &bmv[2]);
1258 vp8_mc_part(s, dst, ref, x_off, y_off,
1259 8, 8, 8, 8, width, height, &bmv[3]);
1264 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1268 if (mb->mode != MODE_I4x4) {
1269 uint8_t *y_dst = dst[0];
1270 for (y = 0; y < 4; y++) {
1271 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1273 if (nnz4&~0x01010101) {
1274 for (x = 0; x < 4; x++) {
1275 if ((uint8_t)nnz4 == 1)
1276 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1277 else if((uint8_t)nnz4 > 1)
1278 s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1284 s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1287 y_dst += 4*s->linesize;
1291 for (ch = 0; ch < 2; ch++) {
1292 uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1294 uint8_t *ch_dst = dst[1+ch];
1295 if (nnz4&~0x01010101) {
1296 for (y = 0; y < 2; y++) {
1297 for (x = 0; x < 2; x++) {
1298 if ((uint8_t)nnz4 == 1)
1299 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1300 else if((uint8_t)nnz4 > 1)
1301 s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1304 goto chroma_idct_end;
1306 ch_dst += 4*s->uvlinesize;
1309 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1316 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1318 int interior_limit, filter_level;
1320 if (s->segmentation.enabled) {
1321 filter_level = s->segmentation.filter_level[s->segment];
1322 if (!s->segmentation.absolute_vals)
1323 filter_level += s->filter.level;
1325 filter_level = s->filter.level;
1327 if (s->lf_delta.enabled) {
1328 filter_level += s->lf_delta.ref[mb->ref_frame];
1329 filter_level += s->lf_delta.mode[mb->mode];
1332 /* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1333 #define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1334 filter_level = POW2CLIP(filter_level, 63);
1336 interior_limit = filter_level;
1337 if (s->filter.sharpness) {
1338 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1339 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1341 interior_limit = FFMAX(interior_limit, 1);
1343 f->filter_level = filter_level;
1344 f->inner_limit = interior_limit;
1345 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1348 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1350 int mbedge_lim, bedge_lim, hev_thresh;
1351 int filter_level = f->filter_level;
1352 int inner_limit = f->inner_limit;
1353 int inner_filter = f->inner_filter;
1354 int linesize = s->linesize;
1355 int uvlinesize = s->uvlinesize;
1356 static const uint8_t hev_thresh_lut[2][64] = {
1357 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1358 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1359 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1361 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1362 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1363 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1370 bedge_lim = 2*filter_level + inner_limit;
1371 mbedge_lim = bedge_lim + 4;
1373 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1376 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1377 mbedge_lim, inner_limit, hev_thresh);
1378 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1379 mbedge_lim, inner_limit, hev_thresh);
1383 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1384 inner_limit, hev_thresh);
1385 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1386 inner_limit, hev_thresh);
1387 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1388 inner_limit, hev_thresh);
1389 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1390 uvlinesize, bedge_lim,
1391 inner_limit, hev_thresh);
1395 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1396 mbedge_lim, inner_limit, hev_thresh);
1397 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1398 mbedge_lim, inner_limit, hev_thresh);
1402 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1403 linesize, bedge_lim,
1404 inner_limit, hev_thresh);
1405 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1406 linesize, bedge_lim,
1407 inner_limit, hev_thresh);
1408 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1409 linesize, bedge_lim,
1410 inner_limit, hev_thresh);
1411 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1412 dst[2] + 4 * uvlinesize,
1413 uvlinesize, bedge_lim,
1414 inner_limit, hev_thresh);
1418 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1420 int mbedge_lim, bedge_lim;
1421 int filter_level = f->filter_level;
1422 int inner_limit = f->inner_limit;
1423 int inner_filter = f->inner_filter;
1424 int linesize = s->linesize;
1429 bedge_lim = 2*filter_level + inner_limit;
1430 mbedge_lim = bedge_lim + 4;
1433 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1435 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1436 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1437 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1441 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1443 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1444 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1445 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1449 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1451 VP8FilterStrength *f = s->filter_strength;
1453 curframe->data[0] + 16*mb_y*s->linesize,
1454 curframe->data[1] + 8*mb_y*s->uvlinesize,
1455 curframe->data[2] + 8*mb_y*s->uvlinesize
1459 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1460 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1461 filter_mb(s, dst, f++, mb_x, mb_y);
1468 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1470 VP8FilterStrength *f = s->filter_strength;
1471 uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1474 for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1475 backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1476 filter_mb_simple(s, dst, f++, mb_x, mb_y);
1481 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1484 VP8Context *s = avctx->priv_data;
1485 int ret, mb_x, mb_y, i, y, referenced;
1486 enum AVDiscard skip_thresh;
1487 AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1489 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1492 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1493 || s->update_altref == VP56_FRAME_CURRENT;
1495 skip_thresh = !referenced ? AVDISCARD_NONREF :
1496 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1498 if (avctx->skip_frame >= skip_thresh) {
1502 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1504 // release no longer referenced frames
1505 for (i = 0; i < 5; i++)
1506 if (s->frames[i].data[0] &&
1507 &s->frames[i] != prev_frame &&
1508 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1509 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1510 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1511 ff_thread_release_buffer(avctx, &s->frames[i]);
1513 // find a free buffer
1514 for (i = 0; i < 5; i++)
1515 if (&s->frames[i] != prev_frame &&
1516 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1517 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1518 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1519 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1523 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1526 if (curframe->data[0])
1527 ff_thread_release_buffer(avctx, curframe);
1529 curframe->key_frame = s->keyframe;
1530 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1531 curframe->reference = referenced ? 3 : 0;
1532 curframe->ref_index[0] = s->segmentation_map;
1533 if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1534 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1538 // check if golden and altref are swapped
1539 if (s->update_altref != VP56_FRAME_NONE) {
1540 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1542 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1544 if (s->update_golden != VP56_FRAME_NONE) {
1545 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1547 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1549 if (s->update_last) {
1550 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1552 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1554 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1556 ff_thread_finish_setup(avctx);
1558 // Given that arithmetic probabilities are updated every frame, it's quite likely
1559 // that the values we have on a random interframe are complete junk if we didn't
1560 // start decode on a keyframe. So just don't display anything rather than junk.
1561 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1562 !s->framep[VP56_FRAME_GOLDEN] ||
1563 !s->framep[VP56_FRAME_GOLDEN2])) {
1564 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1565 return AVERROR_INVALIDDATA;
1568 s->linesize = curframe->linesize[0];
1569 s->uvlinesize = curframe->linesize[1];
1571 if (!s->edge_emu_buffer)
1572 s->edge_emu_buffer = av_malloc(21*s->linesize);
1574 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1576 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1577 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1579 // top edge of 127 for intra prediction
1580 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1581 s->top_border[0][15] = s->top_border[0][23] = 127;
1582 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1584 memset(s->ref_count, 0, sizeof(s->ref_count));
1586 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1588 #define MARGIN (16 << 2)
1589 s->mv_min.y = -MARGIN;
1590 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1592 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1593 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1594 VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1595 int mb_xy = mb_y*s->mb_width;
1597 curframe->data[0] + 16*mb_y*s->linesize,
1598 curframe->data[1] + 8*mb_y*s->uvlinesize,
1599 curframe->data[2] + 8*mb_y*s->uvlinesize
1602 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1603 memset(s->left_nnz, 0, sizeof(s->left_nnz));
1604 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1606 // left edge of 129 for intra prediction
1607 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1608 for (i = 0; i < 3; i++)
1609 for (y = 0; y < 16>>!!i; y++)
1610 dst[i][y*curframe->linesize[i]-1] = 129;
1611 if (mb_y == 1) // top left edge is also 129
1612 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1615 s->mv_min.x = -MARGIN;
1616 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1617 if (prev_frame && s->segmentation.enabled && s->segmentation.update_map)
1618 ff_thread_await_progress(prev_frame, mb_y, 0);
1620 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1621 /* Prefetch the current frame, 4 MBs ahead */
1622 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1623 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1625 decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1626 prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1628 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1631 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1633 if (mb->mode <= MODE_I4x4)
1634 intra_predict(s, dst, mb, mb_x, mb_y);
1636 inter_predict(s, dst, mb, mb_x, mb_y);
1638 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1641 idct_mb(s, dst, mb);
1643 AV_ZERO64(s->left_nnz);
1644 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1646 // Reset DC block predictors if they would exist if the mb had coefficients
1647 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1649 s->top_nnz[mb_x][8] = 0;
1653 if (s->deblock_filter)
1654 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1656 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1664 if (s->deblock_filter) {
1665 if (s->filter.simple)
1666 filter_mb_row_simple(s, curframe, mb_y);
1668 filter_mb_row(s, curframe, mb_y);
1673 ff_thread_report_progress(curframe, mb_y, 0);
1676 ff_thread_report_progress(curframe, INT_MAX, 0);
1678 // if future frames don't use the updated probabilities,
1679 // reset them to the values we saved
1680 if (!s->update_probabilities)
1681 s->prob[0] = s->prob[1];
1683 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1685 if (!s->invisible) {
1686 *(AVFrame*)data = *curframe;
1687 *data_size = sizeof(AVFrame);
1693 static av_cold int vp8_decode_init(AVCodecContext *avctx)
1695 VP8Context *s = avctx->priv_data;
1698 avctx->pix_fmt = PIX_FMT_YUV420P;
1700 dsputil_init(&s->dsp, avctx);
1701 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1702 ff_vp8dsp_init(&s->vp8dsp);
1707 static av_cold int vp8_decode_free(AVCodecContext *avctx)
1709 vp8_decode_flush(avctx);
1713 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1715 VP8Context *s = avctx->priv_data;
1722 #define REBASE(pic) \
1723 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1725 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1727 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1729 s->prob[0] = s_src->prob[!s_src->update_probabilities];
1730 s->segmentation = s_src->segmentation;
1731 s->lf_delta = s_src->lf_delta;
1732 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1734 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1735 s->framep[0] = REBASE(s_src->next_framep[0]);
1736 s->framep[1] = REBASE(s_src->next_framep[1]);
1737 s->framep[2] = REBASE(s_src->next_framep[2]);
1738 s->framep[3] = REBASE(s_src->next_framep[3]);
1743 AVCodec ff_vp8_decoder = {
1752 CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1753 .flush = vp8_decode_flush,
1754 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1755 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1756 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),