2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
49 av_freep(&s->thread_data);
50 av_freep(&s->macroblocks_base);
51 av_freep(&s->intra4x4_pred_mode_top);
52 av_freep(&s->top_nnz);
53 av_freep(&s->top_border);
55 s->macroblocks = NULL;
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
61 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
65 ff_thread_release_buffer(s->avctx, &f->tf);
66 return AVERROR(ENOMEM);
71 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
73 av_buffer_unref(&f->seg_map);
74 ff_thread_release_buffer(s->avctx, &f->tf);
77 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
81 vp8_release_frame(s, dst);
83 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
86 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87 vp8_release_frame(s, dst);
88 return AVERROR(ENOMEM);
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
97 VP8Context *s = avctx->priv_data;
100 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101 vp8_release_frame(s, &s->frames[i]);
102 memset(s->framep, 0, sizeof(s->framep));
108 static void vp8_decode_flush(AVCodecContext *avctx)
110 vp8_decode_flush_impl(avctx, 0);
113 static int update_dimensions(VP8Context *s, int width, int height)
115 AVCodecContext *avctx = s->avctx;
118 if (width != s->avctx->width ||
119 height != s->avctx->height) {
120 vp8_decode_flush_impl(s->avctx, 1);
122 ret = ff_set_dimensions(s->avctx, width, height);
127 s->mb_width = (s->avctx->coded_width +15) / 16;
128 s->mb_height = (s->avctx->coded_height+15) / 16;
130 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
131 if (!s->mb_layout) { // Frame threading and one thread
132 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
133 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
135 else // Sliced threading
136 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
137 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
138 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
139 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
141 for (i = 0; i < MAX_THREADS; i++) {
142 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
144 pthread_mutex_init(&s->thread_data[i].lock, NULL);
145 pthread_cond_init(&s->thread_data[i].cond, NULL);
149 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
150 (!s->intra4x4_pred_mode_top && !s->mb_layout))
151 return AVERROR(ENOMEM);
153 s->macroblocks = s->macroblocks_base + 1;
158 static void parse_segment_info(VP8Context *s)
160 VP56RangeCoder *c = &s->c;
163 s->segmentation.update_map = vp8_rac_get(c);
165 if (vp8_rac_get(c)) { // update segment feature data
166 s->segmentation.absolute_vals = vp8_rac_get(c);
168 for (i = 0; i < 4; i++)
169 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
171 for (i = 0; i < 4; i++)
172 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
174 if (s->segmentation.update_map)
175 for (i = 0; i < 3; i++)
176 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
179 static void update_lf_deltas(VP8Context *s)
181 VP56RangeCoder *c = &s->c;
184 for (i = 0; i < 4; i++) {
185 if (vp8_rac_get(c)) {
186 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
189 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
194 if (vp8_rac_get(c)) {
195 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
198 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
203 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 const uint8_t *sizes = buf;
208 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210 buf += 3*(s->num_coeff_partitions-1);
211 buf_size -= 3*(s->num_coeff_partitions-1);
215 for (i = 0; i < s->num_coeff_partitions-1; i++) {
216 int size = AV_RL24(sizes + 3*i);
217 if (buf_size - size < 0)
220 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
224 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
229 static void get_quants(VP8Context *s)
231 VP56RangeCoder *c = &s->c;
234 int yac_qi = vp8_rac_get_uint(c, 7);
235 int ydc_delta = vp8_rac_get_sint(c, 4);
236 int y2dc_delta = vp8_rac_get_sint(c, 4);
237 int y2ac_delta = vp8_rac_get_sint(c, 4);
238 int uvdc_delta = vp8_rac_get_sint(c, 4);
239 int uvac_delta = vp8_rac_get_sint(c, 4);
241 for (i = 0; i < 4; i++) {
242 if (s->segmentation.enabled) {
243 base_qi = s->segmentation.base_quant[i];
244 if (!s->segmentation.absolute_vals)
249 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
250 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
251 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
252 /* 101581>>16 is equivalent to 155/100 */
253 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
254 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
257 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
263 * Determine which buffers golden and altref should be updated with after this frame.
264 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
266 * Intra frames update all 3 references
267 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
268 * If the update (golden|altref) flag is set, it's updated with the current frame
269 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
270 * If the flag is not set, the number read means:
272 * 1: VP56_FRAME_PREVIOUS
273 * 2: update golden with altref, or update altref with golden
275 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
277 VP56RangeCoder *c = &s->c;
280 return VP56_FRAME_CURRENT;
282 switch (vp8_rac_get_uint(c, 2)) {
284 return VP56_FRAME_PREVIOUS;
286 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
288 return VP56_FRAME_NONE;
291 static void update_refs(VP8Context *s)
293 VP56RangeCoder *c = &s->c;
295 int update_golden = vp8_rac_get(c);
296 int update_altref = vp8_rac_get(c);
298 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
304 VP56RangeCoder *c = &s->c;
305 int header_size, hscale, vscale, i, j, k, l, m, ret;
306 int width = s->avctx->width;
307 int height = s->avctx->height;
309 s->keyframe = !(buf[0] & 1);
310 s->profile = (buf[0]>>1) & 7;
311 s->invisible = !(buf[0] & 0x10);
312 header_size = AV_RL24(buf) >> 5;
317 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
324 if (header_size > buf_size - 7*s->keyframe) {
325 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326 return AVERROR_INVALIDDATA;
330 if (AV_RL24(buf) != 0x2a019d) {
331 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332 return AVERROR_INVALIDDATA;
334 width = AV_RL16(buf+3) & 0x3fff;
335 height = AV_RL16(buf+5) & 0x3fff;
336 hscale = buf[4] >> 6;
337 vscale = buf[6] >> 6;
341 if (hscale || vscale)
342 avpriv_request_sample(s->avctx, "Upscaling");
344 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 for (i = 0; i < 4; i++)
346 for (j = 0; j < 16; j++)
347 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348 sizeof(s->prob->token[i][j]));
349 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352 memset(&s->segmentation, 0, sizeof(s->segmentation));
353 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
356 ff_vp56_init_range_decoder(c, buf, header_size);
358 buf_size -= header_size;
362 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
363 vp8_rac_get(c); // whether we can skip clamping in dsp functions
366 if ((s->segmentation.enabled = vp8_rac_get(c)))
367 parse_segment_info(s);
369 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371 s->filter.simple = vp8_rac_get(c);
372 s->filter.level = vp8_rac_get_uint(c, 6);
373 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375 if ((s->lf_delta.enabled = vp8_rac_get(c)))
379 if (setup_partitions(s, buf, buf_size)) {
380 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
381 return AVERROR_INVALIDDATA;
384 if (!s->macroblocks_base || /* first frame */
385 width != s->avctx->width || height != s->avctx->height) {
386 if ((ret = update_dimensions(s, width, height)) < 0)
394 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
395 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
398 // if we aren't saving this frame's probabilities for future frames,
399 // make a copy of the current probabilities
400 if (!(s->update_probabilities = vp8_rac_get(c)))
401 s->prob[1] = s->prob[0];
403 s->update_last = s->keyframe || vp8_rac_get(c);
405 for (i = 0; i < 4; i++)
406 for (j = 0; j < 8; j++)
407 for (k = 0; k < 3; k++)
408 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
409 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
410 int prob = vp8_rac_get_uint(c, 8);
411 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
412 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415 if ((s->mbskip_enabled = vp8_rac_get(c)))
416 s->prob->mbskip = vp8_rac_get_uint(c, 8);
419 s->prob->intra = vp8_rac_get_uint(c, 8);
420 s->prob->last = vp8_rac_get_uint(c, 8);
421 s->prob->golden = vp8_rac_get_uint(c, 8);
424 for (i = 0; i < 4; i++)
425 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 for (i = 0; i < 3; i++)
428 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430 // 17.2 MV probability update
431 for (i = 0; i < 2; i++)
432 for (j = 0; j < 19; j++)
433 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
434 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
440 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
443 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
447 * Motion vector coding, 17.1.
449 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
453 if (vp56_rac_get_prob_branchy(c, p[0])) {
456 for (i = 0; i < 3; i++)
457 x += vp56_rac_get_prob(c, p[9 + i]) << i;
458 for (i = 9; i > 3; i--)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
464 const uint8_t *ps = p+2;
465 bit = vp56_rac_get_prob(c, *ps);
468 bit = vp56_rac_get_prob(c, *ps);
471 x += vp56_rac_get_prob(c, *ps);
474 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
477 static av_always_inline
478 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
481 return vp8_submv_prob[4-!!left];
483 return vp8_submv_prob[2];
484 return vp8_submv_prob[1-!!left];
488 * Split motion vector prediction, 16.4.
489 * @returns the number of motion vectors parsed (2, 4 or 16)
491 static av_always_inline
492 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
496 VP8Macroblock *top_mb;
497 VP8Macroblock *left_mb = &mb[-1];
498 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500 *mbsplits_cur, *firstidx;
502 VP56mv *left_mv = left_mb->bmv;
503 VP56mv *cur_mv = mb->bmv;
505 if (!layout) // layout is inlined, s->mb_layout is not
508 top_mb = &mb[-s->mb_width-1];
509 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
510 top_mv = top_mb->bmv;
512 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
514 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 part_idx = VP8_SPLITMVMODE_8x8;
519 part_idx = VP8_SPLITMVMODE_4x4;
522 num = vp8_mbsplit_count[part_idx];
523 mbsplits_cur = vp8_mbsplits[part_idx],
524 firstidx = vp8_mbfirstidx[part_idx];
525 mb->partitioning = part_idx;
527 for (n = 0; n < num; n++) {
529 uint32_t left, above;
530 const uint8_t *submv_prob;
533 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541 submv_prob = get_submv_prob(left, above);
543 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
544 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
546 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
547 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 AV_ZERO32(&mb->bmv[n]);
552 AV_WN32A(&mb->bmv[n], above);
555 AV_WN32A(&mb->bmv[n], left);
562 static av_always_inline
563 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 VP8Macroblock *mb_edge[3] = { 0 /* top */,
568 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571 int cur_sign_bias = s->sign_bias[mb->ref_frame];
572 int8_t *sign_bias = s->sign_bias;
574 uint8_t cnt[4] = { 0 };
575 VP56RangeCoder *c = &s->c;
577 if (!layout) { // layout is inlined (s->mb_layout is not)
582 mb_edge[0] = mb - s->mb_width-1;
583 mb_edge[2] = mb - s->mb_width-2;
586 AV_ZERO32(&near_mv[0]);
587 AV_ZERO32(&near_mv[1]);
588 AV_ZERO32(&near_mv[2]);
590 /* Process MB on top, left and top-left */
591 #define MV_EDGE_CHECK(n)\
593 VP8Macroblock *edge = mb_edge[n];\
594 int edge_ref = edge->ref_frame;\
595 if (edge_ref != VP56_FRAME_CURRENT) {\
596 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (cur_sign_bias != sign_bias[edge_ref]) {\
599 /* SWAR negate of the values in mv. */\
601 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 if (!n || mv != AV_RN32A(&near_mv[idx]))\
604 AV_WN32A(&near_mv[++idx], mv);\
605 cnt[idx] += 1 + (n != 2);\
607 cnt[CNT_ZERO] += 1 + (n != 2);\
615 mb->partitioning = VP8_SPLITMVMODE_NONE;
616 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
617 mb->mode = VP8_MVMODE_MV;
619 /* If we have three distinct MVs, merge first and last if they're the same */
620 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621 cnt[CNT_NEAREST] += 1;
623 /* Swap near and nearest if necessary */
624 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
625 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
626 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
629 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632 /* Choose the best mv out of 0,0 and the nearest mv */
633 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
635 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
636 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
639 mb->mode = VP8_MVMODE_SPLIT;
640 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
643 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
647 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
651 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
655 mb->mode = VP8_MVMODE_ZERO;
661 static av_always_inline
662 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
663 int mb_x, int keyframe, int layout)
665 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
668 VP8Macroblock *mb_top = mb - s->mb_width - 1;
669 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
674 uint8_t* const left = s->intra4x4_pred_mode_left;
676 top = mb->intra4x4_pred_mode_top;
678 top = s->intra4x4_pred_mode_top + 4 * mb_x;
679 for (y = 0; y < 4; y++) {
680 for (x = 0; x < 4; x++) {
682 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
683 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
684 left[y] = top[x] = *intra4x4;
690 for (i = 0; i < 16; i++)
691 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
695 static av_always_inline
696 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
697 uint8_t *segment, uint8_t *ref, int layout)
699 VP56RangeCoder *c = &s->c;
701 if (s->segmentation.update_map)
702 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
703 else if (s->segmentation.enabled)
704 *segment = ref ? *ref : *segment;
705 mb->segment = *segment;
707 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
710 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
712 if (mb->mode == MODE_I4x4) {
713 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
716 if (s->mb_layout == 1)
717 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
719 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
720 AV_WN32A( s->intra4x4_pred_mode_left, modes);
723 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
724 mb->ref_frame = VP56_FRAME_CURRENT;
725 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
727 if (vp56_rac_get_prob_branchy(c, s->prob->last))
728 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
729 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
731 mb->ref_frame = VP56_FRAME_PREVIOUS;
732 s->ref_count[mb->ref_frame-1]++;
734 // motion vectors, 16.3
735 decode_mvs(s, mb, mb_x, mb_y, layout);
738 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
740 if (mb->mode == MODE_I4x4)
741 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
743 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
744 mb->ref_frame = VP56_FRAME_CURRENT;
745 mb->partitioning = VP8_SPLITMVMODE_NONE;
746 AV_ZERO32(&mb->bmv[0]);
750 #ifndef decode_block_coeffs_internal
752 * @param r arithmetic bitstream reader context
753 * @param block destination for block coefficients
754 * @param probs probabilities to use when reading trees from the bitstream
755 * @param i initial coeff index, 0 unless a separate DC block is coded
756 * @param qmul array holding the dc/ac dequant factor at position 0/1
757 * @return 0 if no coeffs were decoded
758 * otherwise, the index of the last coeff decoded plus one
760 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
761 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
762 int i, uint8_t *token_prob, int16_t qmul[2])
764 VP56RangeCoder c = *r;
768 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
772 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774 break; // invalid input; blocks should end with EOB
775 token_prob = probs[i][0];
779 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781 token_prob = probs[i+1][1];
783 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
784 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786 coeff += vp56_rac_get_prob(&c, token_prob[5]);
790 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
792 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
795 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798 } else { // DCT_CAT3 and up
799 int a = vp56_rac_get_prob(&c, token_prob[8]);
800 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
801 int cat = (a<<1) + b;
802 coeff = 3 + (8<<cat);
803 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
806 token_prob = probs[i+1][2];
808 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
817 * @param c arithmetic bitstream reader context
818 * @param block destination for block coefficients
819 * @param probs probabilities to use when reading trees from the bitstream
820 * @param i initial coeff index, 0 unless a separate DC block is coded
821 * @param zero_nhood the initial prediction context for number of surrounding
822 * all-zero blocks (only left/top, so 0-2)
823 * @param qmul array holding the dc/ac dequant factor at position 0/1
824 * @return 0 if no coeffs were decoded
825 * otherwise, the index of the last coeff decoded plus one
827 static av_always_inline
828 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
829 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
830 int i, int zero_nhood, int16_t qmul[2])
832 uint8_t *token_prob = probs[i][zero_nhood];
833 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
835 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
838 static av_always_inline
839 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
840 uint8_t t_nnz[9], uint8_t l_nnz[9])
842 int i, x, y, luma_start = 0, luma_ctx = 3;
843 int nnz_pred, nnz, nnz_total = 0;
844 int segment = mb->segment;
847 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
848 nnz_pred = t_nnz[8] + l_nnz[8];
850 // decode DC values and do hadamard
851 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
852 s->qmat[segment].luma_dc_qmul);
853 l_nnz[8] = t_nnz[8] = !!nnz;
858 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
860 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
867 for (y = 0; y < 4; y++)
868 for (x = 0; x < 4; x++) {
869 nnz_pred = l_nnz[y] + t_nnz[x];
870 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
871 nnz_pred, s->qmat[segment].luma_qmul);
872 // nnz+block_dc may be one more than the actual last index, but we don't care
873 td->non_zero_count_cache[y][x] = nnz + block_dc;
874 t_nnz[x] = l_nnz[y] = !!nnz;
879 // TODO: what to do about dimensions? 2nd dim for luma is x,
880 // but for chroma it's (y<<1)|x
881 for (i = 4; i < 6; i++)
882 for (y = 0; y < 2; y++)
883 for (x = 0; x < 2; x++) {
884 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
885 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
886 nnz_pred, s->qmat[segment].chroma_qmul);
887 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
888 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
892 // if there were no coded coeffs despite the macroblock not being marked skip,
893 // we MUST not do the inner loop filter and should not do IDCT
894 // Since skip isn't used for bitstream prediction, just manually set it.
899 static av_always_inline
900 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
901 int linesize, int uvlinesize, int simple)
903 AV_COPY128(top_border, src_y + 15*linesize);
905 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
906 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
910 static av_always_inline
911 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
912 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
913 int simple, int xchg)
915 uint8_t *top_border_m1 = top_border-32; // for TL prediction
917 src_cb -= uvlinesize;
918 src_cr -= uvlinesize;
920 #define XCHG(a,b,xchg) do { \
921 if (xchg) AV_SWAP64(b,a); \
922 else AV_COPY64(b,a); \
925 XCHG(top_border_m1+8, src_y-8, xchg);
926 XCHG(top_border, src_y, xchg);
927 XCHG(top_border+8, src_y+8, 1);
928 if (mb_x < mb_width-1)
929 XCHG(top_border+32, src_y+16, 1);
931 // only copy chroma for normal loop filter
932 // or to initialize the top row to 127
933 if (!simple || !mb_y) {
934 XCHG(top_border_m1+16, src_cb-8, xchg);
935 XCHG(top_border_m1+24, src_cr-8, xchg);
936 XCHG(top_border+16, src_cb, 1);
937 XCHG(top_border+24, src_cr, 1);
941 static av_always_inline
942 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
945 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
947 return mb_y ? mode : LEFT_DC_PRED8x8;
951 static av_always_inline
952 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
955 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
957 return mb_y ? mode : HOR_PRED8x8;
961 static av_always_inline
962 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
968 return !mb_y ? DC_127_PRED8x8 : mode;
970 return !mb_x ? DC_129_PRED8x8 : mode;
971 case PLANE_PRED8x8 /*TM*/:
972 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
977 static av_always_inline
978 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
981 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
983 return mb_y ? mode : HOR_VP8_PRED;
987 static av_always_inline
988 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
997 case DIAG_DOWN_LEFT_PRED:
999 return !mb_y ? DC_127_PRED : mode;
1007 return !mb_x ? DC_129_PRED : mode;
1009 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1010 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1011 case DIAG_DOWN_RIGHT_PRED:
1012 case VERT_RIGHT_PRED:
1021 static av_always_inline
1022 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1023 VP8Macroblock *mb, int mb_x, int mb_y)
1025 int x, y, mode, nnz;
1028 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1029 // otherwise, skip it if we aren't going to deblock
1030 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1031 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1032 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1033 s->filter.simple, 1);
1035 if (mb->mode < MODE_I4x4) {
1036 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1037 s->hpc.pred16x16[mode](dst[0], s->linesize);
1039 uint8_t *ptr = dst[0];
1040 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1041 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1043 // all blocks on the right edge of the macroblock use bottom edge
1044 // the top macroblock for their topright edge
1045 uint8_t *tr_right = ptr - s->linesize + 16;
1047 // if we're on the right edge of the frame, said edge is extended
1048 // from the top macroblock
1050 mb_x == s->mb_width-1) {
1051 tr = tr_right[-1]*0x01010101u;
1052 tr_right = (uint8_t *)&tr;
1056 AV_ZERO128(td->non_zero_count_cache);
1058 for (y = 0; y < 4; y++) {
1059 uint8_t *topright = ptr + 4 - s->linesize;
1060 for (x = 0; x < 4; x++) {
1061 int copy = 0, linesize = s->linesize;
1062 uint8_t *dst = ptr+4*x;
1063 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1065 if ((y == 0 || x == 3) && mb_y == 0) {
1068 topright = tr_right;
1070 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1072 dst = copy_dst + 12;
1076 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1078 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1082 copy_dst[3] = ptr[4*x-s->linesize-1];
1089 copy_dst[35] = 129U;
1091 copy_dst[11] = ptr[4*x -1];
1092 copy_dst[19] = ptr[4*x+s->linesize -1];
1093 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1094 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1097 s->hpc.pred4x4[mode](dst, topright, linesize);
1099 AV_COPY32(ptr+4*x , copy_dst+12);
1100 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1101 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1102 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1105 nnz = td->non_zero_count_cache[y][x];
1108 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1110 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1115 ptr += 4*s->linesize;
1120 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1121 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1122 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1124 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1125 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1126 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1127 s->filter.simple, 0);
1130 static const uint8_t subpel_idx[3][8] = {
1131 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1132 // also function pointer index
1133 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1134 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1140 * @param s VP8 decoding context
1141 * @param dst target buffer for block data at block position
1142 * @param ref reference picture buffer at origin (0, 0)
1143 * @param mv motion vector (relative to block position) to get pixel data from
1144 * @param x_off horizontal position of block from origin (0, 0)
1145 * @param y_off vertical position of block from origin (0, 0)
1146 * @param block_w width of block (16, 8 or 4)
1147 * @param block_h height of block (always same as block_w)
1148 * @param width width of src/dst plane data
1149 * @param height height of src/dst plane data
1150 * @param linesize size of a single line of plane data, including padding
1151 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1153 static av_always_inline
1154 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1155 ThreadFrame *ref, const VP56mv *mv,
1156 int x_off, int y_off, int block_w, int block_h,
1157 int width, int height, ptrdiff_t linesize,
1158 vp8_mc_func mc_func[3][3])
1160 uint8_t *src = ref->f->data[0];
1163 int src_linesize = linesize;
1165 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1166 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1168 x_off += mv->x >> 2;
1169 y_off += mv->y >> 2;
1172 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1173 src += y_off * linesize + x_off;
1174 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1175 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1176 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1177 src - my_idx * linesize - mx_idx,
1178 EDGE_EMU_LINESIZE, linesize,
1179 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1180 x_off - mx_idx, y_off - my_idx, width, height);
1181 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1182 src_linesize = EDGE_EMU_LINESIZE;
1184 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1186 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1187 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1192 * chroma MC function
1194 * @param s VP8 decoding context
1195 * @param dst1 target buffer for block data at block position (U plane)
1196 * @param dst2 target buffer for block data at block position (V plane)
1197 * @param ref reference picture buffer at origin (0, 0)
1198 * @param mv motion vector (relative to block position) to get pixel data from
1199 * @param x_off horizontal position of block from origin (0, 0)
1200 * @param y_off vertical position of block from origin (0, 0)
1201 * @param block_w width of block (16, 8 or 4)
1202 * @param block_h height of block (always same as block_w)
1203 * @param width width of src/dst plane data
1204 * @param height height of src/dst plane data
1205 * @param linesize size of a single line of plane data, including padding
1206 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1208 static av_always_inline
1209 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1210 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1211 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1212 vp8_mc_func mc_func[3][3])
1214 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1217 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1218 int my = mv->y&7, my_idx = subpel_idx[0][my];
1220 x_off += mv->x >> 3;
1221 y_off += mv->y >> 3;
1224 src1 += y_off * linesize + x_off;
1225 src2 += y_off * linesize + x_off;
1226 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1227 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1228 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1229 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1230 src1 - my_idx * linesize - mx_idx,
1231 EDGE_EMU_LINESIZE, linesize,
1232 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1233 x_off - mx_idx, y_off - my_idx, width, height);
1234 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1235 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1237 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1238 src2 - my_idx * linesize - mx_idx,
1239 EDGE_EMU_LINESIZE, linesize,
1240 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1241 x_off - mx_idx, y_off - my_idx, width, height);
1242 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE* my_idx;
1243 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1245 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1246 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1249 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1250 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1251 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1255 static av_always_inline
1256 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1257 ThreadFrame *ref_frame, int x_off, int y_off,
1258 int bx_off, int by_off,
1259 int block_w, int block_h,
1260 int width, int height, VP56mv *mv)
1265 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1266 ref_frame, mv, x_off + bx_off, y_off + by_off,
1267 block_w, block_h, width, height, s->linesize,
1268 s->put_pixels_tab[block_w == 8]);
1271 if (s->profile == 3) {
1275 x_off >>= 1; y_off >>= 1;
1276 bx_off >>= 1; by_off >>= 1;
1277 width >>= 1; height >>= 1;
1278 block_w >>= 1; block_h >>= 1;
1279 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1280 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1281 &uvmv, x_off + bx_off, y_off + by_off,
1282 block_w, block_h, width, height, s->uvlinesize,
1283 s->put_pixels_tab[1 + (block_w == 4)]);
1286 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1287 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1288 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1290 /* Don't prefetch refs that haven't been used very often this frame. */
1291 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1292 int x_off = mb_x << 4, y_off = mb_y << 4;
1293 int mx = (mb->mv.x>>2) + x_off + 8;
1294 int my = (mb->mv.y>>2) + y_off;
1295 uint8_t **src= s->framep[ref]->tf.f->data;
1296 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1297 /* For threading, a ff_thread_await_progress here might be useful, but
1298 * it actually slows down the decoder. Since a bad prefetch doesn't
1299 * generate bad decoder output, we don't run it here. */
1300 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1301 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1302 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1307 * Apply motion vectors to prediction buffer, chapter 18.
1309 static av_always_inline
1310 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1311 VP8Macroblock *mb, int mb_x, int mb_y)
1313 int x_off = mb_x << 4, y_off = mb_y << 4;
1314 int width = 16*s->mb_width, height = 16*s->mb_height;
1315 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1316 VP56mv *bmv = mb->bmv;
1318 switch (mb->partitioning) {
1319 case VP8_SPLITMVMODE_NONE:
1320 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1321 0, 0, 16, 16, width, height, &mb->mv);
1323 case VP8_SPLITMVMODE_4x4: {
1328 for (y = 0; y < 4; y++) {
1329 for (x = 0; x < 4; x++) {
1330 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1332 4*x + x_off, 4*y + y_off, 4, 4,
1333 width, height, s->linesize,
1334 s->put_pixels_tab[2]);
1339 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1340 for (y = 0; y < 2; y++) {
1341 for (x = 0; x < 2; x++) {
1342 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1343 mb->bmv[ 2*y * 4 + 2*x+1].x +
1344 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1345 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1346 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1347 mb->bmv[ 2*y * 4 + 2*x+1].y +
1348 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1349 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1350 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1351 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1352 if (s->profile == 3) {
1356 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1357 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1358 4*x + x_off, 4*y + y_off, 4, 4,
1359 width, height, s->uvlinesize,
1360 s->put_pixels_tab[2]);
1365 case VP8_SPLITMVMODE_16x8:
1366 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1367 0, 0, 16, 8, width, height, &bmv[0]);
1368 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1369 0, 8, 16, 8, width, height, &bmv[1]);
1371 case VP8_SPLITMVMODE_8x16:
1372 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1373 0, 0, 8, 16, width, height, &bmv[0]);
1374 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1375 8, 0, 8, 16, width, height, &bmv[1]);
1377 case VP8_SPLITMVMODE_8x8:
1378 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1379 0, 0, 8, 8, width, height, &bmv[0]);
1380 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1381 8, 0, 8, 8, width, height, &bmv[1]);
1382 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383 0, 8, 8, 8, width, height, &bmv[2]);
1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385 8, 8, 8, 8, width, height, &bmv[3]);
1390 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1391 uint8_t *dst[3], VP8Macroblock *mb)
1395 if (mb->mode != MODE_I4x4) {
1396 uint8_t *y_dst = dst[0];
1397 for (y = 0; y < 4; y++) {
1398 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1400 if (nnz4&~0x01010101) {
1401 for (x = 0; x < 4; x++) {
1402 if ((uint8_t)nnz4 == 1)
1403 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1404 else if((uint8_t)nnz4 > 1)
1405 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1411 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1414 y_dst += 4*s->linesize;
1418 for (ch = 0; ch < 2; ch++) {
1419 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1421 uint8_t *ch_dst = dst[1+ch];
1422 if (nnz4&~0x01010101) {
1423 for (y = 0; y < 2; y++) {
1424 for (x = 0; x < 2; x++) {
1425 if ((uint8_t)nnz4 == 1)
1426 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1427 else if((uint8_t)nnz4 > 1)
1428 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1431 goto chroma_idct_end;
1433 ch_dst += 4*s->uvlinesize;
1436 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1443 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1445 int interior_limit, filter_level;
1447 if (s->segmentation.enabled) {
1448 filter_level = s->segmentation.filter_level[mb->segment];
1449 if (!s->segmentation.absolute_vals)
1450 filter_level += s->filter.level;
1452 filter_level = s->filter.level;
1454 if (s->lf_delta.enabled) {
1455 filter_level += s->lf_delta.ref[mb->ref_frame];
1456 filter_level += s->lf_delta.mode[mb->mode];
1459 filter_level = av_clip_uintp2(filter_level, 6);
1461 interior_limit = filter_level;
1462 if (s->filter.sharpness) {
1463 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1464 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1466 interior_limit = FFMAX(interior_limit, 1);
1468 f->filter_level = filter_level;
1469 f->inner_limit = interior_limit;
1470 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1473 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1475 int mbedge_lim, bedge_lim, hev_thresh;
1476 int filter_level = f->filter_level;
1477 int inner_limit = f->inner_limit;
1478 int inner_filter = f->inner_filter;
1479 int linesize = s->linesize;
1480 int uvlinesize = s->uvlinesize;
1481 static const uint8_t hev_thresh_lut[2][64] = {
1482 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1483 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1484 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1486 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1488 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1495 bedge_lim = 2*filter_level + inner_limit;
1496 mbedge_lim = bedge_lim + 4;
1498 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1501 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1502 mbedge_lim, inner_limit, hev_thresh);
1503 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1504 mbedge_lim, inner_limit, hev_thresh);
1508 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1509 inner_limit, hev_thresh);
1510 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1511 inner_limit, hev_thresh);
1512 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1513 inner_limit, hev_thresh);
1514 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1515 uvlinesize, bedge_lim,
1516 inner_limit, hev_thresh);
1520 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1521 mbedge_lim, inner_limit, hev_thresh);
1522 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1523 mbedge_lim, inner_limit, hev_thresh);
1527 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1528 linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1531 linesize, bedge_lim,
1532 inner_limit, hev_thresh);
1533 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1534 linesize, bedge_lim,
1535 inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1537 dst[2] + 4 * uvlinesize,
1538 uvlinesize, bedge_lim,
1539 inner_limit, hev_thresh);
1543 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1545 int mbedge_lim, bedge_lim;
1546 int filter_level = f->filter_level;
1547 int inner_limit = f->inner_limit;
1548 int inner_filter = f->inner_filter;
1549 int linesize = s->linesize;
1554 bedge_lim = 2*filter_level + inner_limit;
1555 mbedge_lim = bedge_lim + 4;
1558 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1560 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1561 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1562 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1566 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1568 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1569 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1570 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1574 #define MARGIN (16 << 2)
1575 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1576 VP8Frame *prev_frame)
1578 VP8Context *s = avctx->priv_data;
1581 s->mv_min.y = -MARGIN;
1582 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1583 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1584 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1585 int mb_xy = mb_y*s->mb_width;
1587 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1589 s->mv_min.x = -MARGIN;
1590 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1591 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1593 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1594 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1595 prev_frame && prev_frame->seg_map ?
1596 prev_frame->seg_map->data + mb_xy : NULL, 1);
1606 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1608 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1609 if (otd->thread_mb_pos < tmp) {\
1610 pthread_mutex_lock(&otd->lock);\
1611 td->wait_mb_pos = tmp;\
1613 if (otd->thread_mb_pos >= tmp)\
1615 pthread_cond_wait(&otd->cond, &otd->lock);\
1617 td->wait_mb_pos = INT_MAX;\
1618 pthread_mutex_unlock(&otd->lock);\
1622 #define update_pos(td, mb_y, mb_x)\
1624 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1625 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1626 int is_null = (next_td == NULL) || (prev_td == NULL);\
1627 int pos_check = (is_null) ? 1 :\
1628 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1629 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1630 td->thread_mb_pos = pos;\
1631 if (sliced_threading && pos_check) {\
1632 pthread_mutex_lock(&td->lock);\
1633 pthread_cond_broadcast(&td->cond);\
1634 pthread_mutex_unlock(&td->lock);\
1638 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1639 #define update_pos(td, mb_y, mb_x)
1642 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1643 int jobnr, int threadnr)
1645 VP8Context *s = avctx->priv_data;
1646 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1647 int mb_y = td->thread_mb_pos>>16;
1648 int mb_x, mb_xy = mb_y*s->mb_width;
1649 int num_jobs = s->num_jobs;
1650 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1651 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1654 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1655 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1656 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1658 if (mb_y == 0) prev_td = td;
1659 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1660 if (mb_y == s->mb_height-1) next_td = td;
1661 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1662 if (s->mb_layout == 1)
1663 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1665 // Make sure the previous frame has read its segmentation map,
1666 // if we re-use the same map.
1667 if (prev_frame && s->segmentation.enabled &&
1668 !s->segmentation.update_map)
1669 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1670 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1671 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1672 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1675 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1677 s->mv_min.x = -MARGIN;
1678 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1680 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1681 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1682 if (prev_td != td) {
1683 if (threadnr != 0) {
1684 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1686 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1690 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1691 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1694 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1695 prev_frame && prev_frame->seg_map ?
1696 prev_frame->seg_map->data + mb_xy : NULL, 0);
1698 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1701 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1703 if (mb->mode <= MODE_I4x4)
1704 intra_predict(s, td, dst, mb, mb_x, mb_y);
1706 inter_predict(s, td, dst, mb, mb_x, mb_y);
1708 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1711 idct_mb(s, td, dst, mb);
1713 AV_ZERO64(td->left_nnz);
1714 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1716 // Reset DC block predictors if they would exist if the mb had coefficients
1717 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1718 td->left_nnz[8] = 0;
1719 s->top_nnz[mb_x][8] = 0;
1723 if (s->deblock_filter)
1724 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1726 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1727 if (s->filter.simple)
1728 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1730 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1733 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1741 if (mb_x == s->mb_width+1) {
1742 update_pos(td, mb_y, s->mb_width+3);
1744 update_pos(td, mb_y, mb_x);
1749 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1750 int jobnr, int threadnr)
1752 VP8Context *s = avctx->priv_data;
1753 VP8ThreadData *td = &s->thread_data[threadnr];
1754 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1755 AVFrame *curframe = s->curframe->tf.f;
1757 VP8ThreadData *prev_td, *next_td;
1759 curframe->data[0] + 16*mb_y*s->linesize,
1760 curframe->data[1] + 8*mb_y*s->uvlinesize,
1761 curframe->data[2] + 8*mb_y*s->uvlinesize
1764 if (s->mb_layout == 1)
1765 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1767 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1769 if (mb_y == 0) prev_td = td;
1770 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1771 if (mb_y == s->mb_height-1) next_td = td;
1772 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1774 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1775 VP8FilterStrength *f = &td->filter_strength[mb_x];
1776 if (prev_td != td) {
1777 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1780 if (next_td != &s->thread_data[0]) {
1781 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1784 if (num_jobs == 1) {
1785 if (s->filter.simple)
1786 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1788 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1791 if (s->filter.simple)
1792 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1794 filter_mb(s, dst, f, mb_x, mb_y);
1799 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1803 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1804 int jobnr, int threadnr)
1806 VP8Context *s = avctx->priv_data;
1807 VP8ThreadData *td = &s->thread_data[jobnr];
1808 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1809 VP8Frame *curframe = s->curframe;
1810 int mb_y, num_jobs = s->num_jobs;
1811 td->thread_nr = threadnr;
1812 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1813 if (mb_y >= s->mb_height) break;
1814 td->thread_mb_pos = mb_y<<16;
1815 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1816 if (s->deblock_filter)
1817 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1818 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1823 if (avctx->active_thread_type == FF_THREAD_FRAME)
1824 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1830 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1833 VP8Context *s = avctx->priv_data;
1834 int ret, i, referenced, num_jobs;
1835 enum AVDiscard skip_thresh;
1836 VP8Frame *av_uninit(curframe), *prev_frame;
1838 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1841 prev_frame = s->framep[VP56_FRAME_CURRENT];
1843 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1844 || s->update_altref == VP56_FRAME_CURRENT;
1846 skip_thresh = !referenced ? AVDISCARD_NONREF :
1847 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1849 if (avctx->skip_frame >= skip_thresh) {
1851 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1854 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1856 // release no longer referenced frames
1857 for (i = 0; i < 5; i++)
1858 if (s->frames[i].tf.f->data[0] &&
1859 &s->frames[i] != prev_frame &&
1860 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1861 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1862 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1863 vp8_release_frame(s, &s->frames[i]);
1865 // find a free buffer
1866 for (i = 0; i < 5; i++)
1867 if (&s->frames[i] != prev_frame &&
1868 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1869 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1870 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1871 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1875 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1878 if (curframe->tf.f->data[0])
1879 vp8_release_frame(s, curframe);
1881 // Given that arithmetic probabilities are updated every frame, it's quite likely
1882 // that the values we have on a random interframe are complete junk if we didn't
1883 // start decode on a keyframe. So just don't display anything rather than junk.
1884 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1885 !s->framep[VP56_FRAME_GOLDEN] ||
1886 !s->framep[VP56_FRAME_GOLDEN2])) {
1887 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1888 ret = AVERROR_INVALIDDATA;
1892 curframe->tf.f->key_frame = s->keyframe;
1893 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1894 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1895 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1899 // check if golden and altref are swapped
1900 if (s->update_altref != VP56_FRAME_NONE) {
1901 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1903 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1905 if (s->update_golden != VP56_FRAME_NONE) {
1906 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1908 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1910 if (s->update_last) {
1911 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1913 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1915 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1917 ff_thread_finish_setup(avctx);
1919 s->linesize = curframe->tf.f->linesize[0];
1920 s->uvlinesize = curframe->tf.f->linesize[1];
1922 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1923 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1925 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1926 if (!s->mb_layout && s->keyframe)
1927 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1929 memset(s->ref_count, 0, sizeof(s->ref_count));
1932 if (s->mb_layout == 1) {
1933 // Make sure the previous frame has read its segmentation map,
1934 // if we re-use the same map.
1935 if (prev_frame && s->segmentation.enabled &&
1936 !s->segmentation.update_map)
1937 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1938 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1941 if (avctx->active_thread_type == FF_THREAD_FRAME)
1944 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1945 s->num_jobs = num_jobs;
1946 s->curframe = curframe;
1947 s->prev_frame = prev_frame;
1948 s->mv_min.y = -MARGIN;
1949 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1950 for (i = 0; i < MAX_THREADS; i++) {
1951 s->thread_data[i].thread_mb_pos = 0;
1952 s->thread_data[i].wait_mb_pos = INT_MAX;
1954 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1956 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1957 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1960 // if future frames don't use the updated probabilities,
1961 // reset them to the values we saved
1962 if (!s->update_probabilities)
1963 s->prob[0] = s->prob[1];
1965 if (!s->invisible) {
1966 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
1973 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1977 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
1979 VP8Context *s = avctx->priv_data;
1982 vp8_decode_flush_impl(avctx, 1);
1983 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
1984 av_frame_free(&s->frames[i].tf.f);
1989 static av_cold int vp8_init_frames(VP8Context *s)
1992 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1993 s->frames[i].tf.f = av_frame_alloc();
1994 if (!s->frames[i].tf.f)
1995 return AVERROR(ENOMEM);
2000 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2002 VP8Context *s = avctx->priv_data;
2006 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2007 avctx->internal->allocate_progress = 1;
2009 ff_videodsp_init(&s->vdsp, 8);
2010 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2011 ff_vp8dsp_init(&s->vp8dsp);
2013 if ((ret = vp8_init_frames(s)) < 0) {
2014 ff_vp8_decode_free(avctx);
2021 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2023 VP8Context *s = avctx->priv_data;
2028 if ((ret = vp8_init_frames(s)) < 0) {
2029 ff_vp8_decode_free(avctx);
2036 #define REBASE(pic) \
2037 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2039 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2041 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2044 if (s->macroblocks_base &&
2045 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2047 s->mb_width = s_src->mb_width;
2048 s->mb_height = s_src->mb_height;
2051 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2052 s->segmentation = s_src->segmentation;
2053 s->lf_delta = s_src->lf_delta;
2054 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2056 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2057 if (s_src->frames[i].tf.f->data[0]) {
2058 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2064 s->framep[0] = REBASE(s_src->next_framep[0]);
2065 s->framep[1] = REBASE(s_src->next_framep[1]);
2066 s->framep[2] = REBASE(s_src->next_framep[2]);
2067 s->framep[3] = REBASE(s_src->next_framep[3]);
2072 AVCodec ff_vp8_decoder = {
2074 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2075 .type = AVMEDIA_TYPE_VIDEO,
2076 .id = AV_CODEC_ID_VP8,
2077 .priv_data_size = sizeof(VP8Context),
2078 .init = ff_vp8_decode_init,
2079 .close = ff_vp8_decode_free,
2080 .decode = ff_vp8_decode_frame,
2081 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2082 .flush = vp8_decode_flush,
2083 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2084 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),