2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
48 av_freep(&s->thread_data[i].edge_emu_buffer);
50 av_freep(&s->thread_data);
51 av_freep(&s->macroblocks_base);
52 av_freep(&s->intra4x4_pred_mode_top);
53 av_freep(&s->top_nnz);
54 av_freep(&s->top_border);
56 s->macroblocks = NULL;
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
62 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
65 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66 ff_thread_release_buffer(s->avctx, &f->tf);
67 return AVERROR(ENOMEM);
72 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
74 av_buffer_unref(&f->seg_map);
75 ff_thread_release_buffer(s->avctx, &f->tf);
78 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
82 vp8_release_frame(s, dst);
84 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
87 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88 vp8_release_frame(s, dst);
89 return AVERROR(ENOMEM);
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
98 VP8Context *s = avctx->priv_data;
101 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102 vp8_release_frame(s, &s->frames[i]);
103 memset(s->framep, 0, sizeof(s->framep));
109 static void vp8_decode_flush(AVCodecContext *avctx)
111 vp8_decode_flush_impl(avctx, 0);
114 static int update_dimensions(VP8Context *s, int width, int height)
116 AVCodecContext *avctx = s->avctx;
119 if (width != s->avctx->width ||
120 height != s->avctx->height) {
121 vp8_decode_flush_impl(s->avctx, 1);
123 ret = ff_set_dimensions(s->avctx, width, height);
128 s->mb_width = (s->avctx->coded_width +15) / 16;
129 s->mb_height = (s->avctx->coded_height+15) / 16;
131 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
132 if (!s->mb_layout) { // Frame threading and one thread
133 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
134 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
136 else // Sliced threading
137 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
138 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
139 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
140 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
142 for (i = 0; i < MAX_THREADS; i++) {
143 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
145 pthread_mutex_init(&s->thread_data[i].lock, NULL);
146 pthread_cond_init(&s->thread_data[i].cond, NULL);
150 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
151 (!s->intra4x4_pred_mode_top && !s->mb_layout))
152 return AVERROR(ENOMEM);
154 s->macroblocks = s->macroblocks_base + 1;
159 static void parse_segment_info(VP8Context *s)
161 VP56RangeCoder *c = &s->c;
164 s->segmentation.update_map = vp8_rac_get(c);
166 if (vp8_rac_get(c)) { // update segment feature data
167 s->segmentation.absolute_vals = vp8_rac_get(c);
169 for (i = 0; i < 4; i++)
170 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
172 for (i = 0; i < 4; i++)
173 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
175 if (s->segmentation.update_map)
176 for (i = 0; i < 3; i++)
177 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
180 static void update_lf_deltas(VP8Context *s)
182 VP56RangeCoder *c = &s->c;
185 for (i = 0; i < 4; i++) {
186 if (vp8_rac_get(c)) {
187 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
190 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
194 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
195 if (vp8_rac_get(c)) {
196 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
199 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
204 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
206 const uint8_t *sizes = buf;
209 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
211 buf += 3*(s->num_coeff_partitions-1);
212 buf_size -= 3*(s->num_coeff_partitions-1);
216 for (i = 0; i < s->num_coeff_partitions-1; i++) {
217 int size = AV_RL24(sizes + 3*i);
218 if (buf_size - size < 0)
221 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
225 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
230 static void get_quants(VP8Context *s)
232 VP56RangeCoder *c = &s->c;
235 int yac_qi = vp8_rac_get_uint(c, 7);
236 int ydc_delta = vp8_rac_get_sint(c, 4);
237 int y2dc_delta = vp8_rac_get_sint(c, 4);
238 int y2ac_delta = vp8_rac_get_sint(c, 4);
239 int uvdc_delta = vp8_rac_get_sint(c, 4);
240 int uvac_delta = vp8_rac_get_sint(c, 4);
242 for (i = 0; i < 4; i++) {
243 if (s->segmentation.enabled) {
244 base_qi = s->segmentation.base_quant[i];
245 if (!s->segmentation.absolute_vals)
250 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
251 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
252 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
253 /* 101581>>16 is equivalent to 155/100 */
254 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
255 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
256 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
258 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
259 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
264 * Determine which buffers golden and altref should be updated with after this frame.
265 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
267 * Intra frames update all 3 references
268 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
269 * If the update (golden|altref) flag is set, it's updated with the current frame
270 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
271 * If the flag is not set, the number read means:
273 * 1: VP56_FRAME_PREVIOUS
274 * 2: update golden with altref, or update altref with golden
276 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
278 VP56RangeCoder *c = &s->c;
281 return VP56_FRAME_CURRENT;
283 switch (vp8_rac_get_uint(c, 2)) {
285 return VP56_FRAME_PREVIOUS;
287 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
289 return VP56_FRAME_NONE;
292 static void update_refs(VP8Context *s)
294 VP56RangeCoder *c = &s->c;
296 int update_golden = vp8_rac_get(c);
297 int update_altref = vp8_rac_get(c);
299 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
300 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
303 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
305 VP56RangeCoder *c = &s->c;
306 int header_size, hscale, vscale, i, j, k, l, m, ret;
307 int width = s->avctx->width;
308 int height = s->avctx->height;
310 s->keyframe = !(buf[0] & 1);
311 s->profile = (buf[0]>>1) & 7;
312 s->invisible = !(buf[0] & 0x10);
313 header_size = AV_RL24(buf) >> 5;
318 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
321 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
322 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
325 if (header_size > buf_size - 7*s->keyframe) {
326 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
327 return AVERROR_INVALIDDATA;
331 if (AV_RL24(buf) != 0x2a019d) {
332 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
333 return AVERROR_INVALIDDATA;
335 width = AV_RL16(buf+3) & 0x3fff;
336 height = AV_RL16(buf+5) & 0x3fff;
337 hscale = buf[4] >> 6;
338 vscale = buf[6] >> 6;
342 if (hscale || vscale)
343 avpriv_request_sample(s->avctx, "Upscaling");
345 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
346 for (i = 0; i < 4; i++)
347 for (j = 0; j < 16; j++)
348 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
349 sizeof(s->prob->token[i][j]));
350 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
351 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
352 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
353 memset(&s->segmentation, 0, sizeof(s->segmentation));
354 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
357 ff_vp56_init_range_decoder(c, buf, header_size);
359 buf_size -= header_size;
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
404 s->update_last = s->keyframe || vp8_rac_get(c);
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
448 * Motion vector coding, 17.1.
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
469 bit = vp56_rac_get_prob(c, *ps);
472 x += vp56_rac_get_prob(c, *ps);
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
482 return vp8_submv_prob[4-!!left];
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
501 *mbsplits_cur, *firstidx;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
506 if (!layout) // layout is inlined, s->mb_layout is not
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
517 part_idx = VP8_SPLITMVMODE_8x8;
520 part_idx = VP8_SPLITMVMODE_4x4;
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
528 for (n = 0; n < num; n++) {
530 uint32_t left, above;
531 const uint8_t *submv_prob;
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 submv_prob = get_submv_prob(left, above);
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
550 AV_ZERO32(&mb->bmv[n]);
553 AV_WN32A(&mb->bmv[n], above);
556 AV_WN32A(&mb->bmv[n], left);
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
578 if (!layout) { // layout is inlined (s->mb_layout is not)
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
656 mb->mode = VP8_MVMODE_ZERO;
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
675 uint8_t* const left = s->intra4x4_pred_mode_left;
677 top = mb->intra4x4_pred_mode_top;
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
700 VP56RangeCoder *c = &s->c;
702 if (s->segmentation.update_map)
703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
704 else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
751 #ifndef decode_block_coeffs_internal
753 * @param r arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
765 VP56RangeCoder c = *r;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782 token_prob = probs[i+1][1];
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 token_prob = probs[i+1][2];
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
904 AV_COPY128(top_border, src_y + 15*linesize);
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948 return mb_y ? mode : LEFT_DC_PRED8x8;
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958 return mb_y ? mode : HOR_PRED8x8;
962 static av_always_inline
963 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
967 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
969 return !mb_y ? DC_127_PRED8x8 : mode;
971 return !mb_x ? DC_129_PRED8x8 : mode;
972 case PLANE_PRED8x8 /*TM*/:
973 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
978 static av_always_inline
979 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
982 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
984 return mb_y ? mode : HOR_VP8_PRED;
988 static av_always_inline
989 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
998 case DIAG_DOWN_LEFT_PRED:
1000 return !mb_y ? DC_127_PRED : mode;
1008 return !mb_x ? DC_129_PRED : mode;
1010 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1011 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1012 case DIAG_DOWN_RIGHT_PRED:
1013 case VERT_RIGHT_PRED:
1022 static av_always_inline
1023 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1024 VP8Macroblock *mb, int mb_x, int mb_y)
1026 int x, y, mode, nnz;
1029 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1030 // otherwise, skip it if we aren't going to deblock
1031 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1032 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1033 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1034 s->filter.simple, 1);
1036 if (mb->mode < MODE_I4x4) {
1037 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1038 s->hpc.pred16x16[mode](dst[0], s->linesize);
1040 uint8_t *ptr = dst[0];
1041 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1042 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1044 // all blocks on the right edge of the macroblock use bottom edge
1045 // the top macroblock for their topright edge
1046 uint8_t *tr_right = ptr - s->linesize + 16;
1048 // if we're on the right edge of the frame, said edge is extended
1049 // from the top macroblock
1051 mb_x == s->mb_width-1) {
1052 tr = tr_right[-1]*0x01010101u;
1053 tr_right = (uint8_t *)&tr;
1057 AV_ZERO128(td->non_zero_count_cache);
1059 for (y = 0; y < 4; y++) {
1060 uint8_t *topright = ptr + 4 - s->linesize;
1061 for (x = 0; x < 4; x++) {
1062 int copy = 0, linesize = s->linesize;
1063 uint8_t *dst = ptr+4*x;
1064 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1066 if ((y == 0 || x == 3) && mb_y == 0) {
1069 topright = tr_right;
1071 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1073 dst = copy_dst + 12;
1077 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1079 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1083 copy_dst[3] = ptr[4*x-s->linesize-1];
1090 copy_dst[35] = 129U;
1092 copy_dst[11] = ptr[4*x -1];
1093 copy_dst[19] = ptr[4*x+s->linesize -1];
1094 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1095 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1098 s->hpc.pred4x4[mode](dst, topright, linesize);
1100 AV_COPY32(ptr+4*x , copy_dst+12);
1101 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1102 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1103 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1106 nnz = td->non_zero_count_cache[y][x];
1109 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1111 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1116 ptr += 4*s->linesize;
1121 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1122 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1123 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1125 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1126 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1127 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1128 s->filter.simple, 0);
1131 static const uint8_t subpel_idx[3][8] = {
1132 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1133 // also function pointer index
1134 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1135 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1141 * @param s VP8 decoding context
1142 * @param dst target buffer for block data at block position
1143 * @param ref reference picture buffer at origin (0, 0)
1144 * @param mv motion vector (relative to block position) to get pixel data from
1145 * @param x_off horizontal position of block from origin (0, 0)
1146 * @param y_off vertical position of block from origin (0, 0)
1147 * @param block_w width of block (16, 8 or 4)
1148 * @param block_h height of block (always same as block_w)
1149 * @param width width of src/dst plane data
1150 * @param height height of src/dst plane data
1151 * @param linesize size of a single line of plane data, including padding
1152 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1154 static av_always_inline
1155 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1156 ThreadFrame *ref, const VP56mv *mv,
1157 int x_off, int y_off, int block_w, int block_h,
1158 int width, int height, ptrdiff_t linesize,
1159 vp8_mc_func mc_func[3][3])
1161 uint8_t *src = ref->f->data[0];
1165 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1166 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1168 x_off += mv->x >> 2;
1169 y_off += mv->y >> 2;
1172 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1173 src += y_off * linesize + x_off;
1174 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1175 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1176 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1177 src - my_idx * linesize - mx_idx,
1179 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1180 x_off - mx_idx, y_off - my_idx, width, height);
1181 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1183 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1185 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1186 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1191 * chroma MC function
1193 * @param s VP8 decoding context
1194 * @param dst1 target buffer for block data at block position (U plane)
1195 * @param dst2 target buffer for block data at block position (V plane)
1196 * @param ref reference picture buffer at origin (0, 0)
1197 * @param mv motion vector (relative to block position) to get pixel data from
1198 * @param x_off horizontal position of block from origin (0, 0)
1199 * @param y_off vertical position of block from origin (0, 0)
1200 * @param block_w width of block (16, 8 or 4)
1201 * @param block_h height of block (always same as block_w)
1202 * @param width width of src/dst plane data
1203 * @param height height of src/dst plane data
1204 * @param linesize size of a single line of plane data, including padding
1205 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1207 static av_always_inline
1208 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1209 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1210 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1211 vp8_mc_func mc_func[3][3])
1213 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1216 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1217 int my = mv->y&7, my_idx = subpel_idx[0][my];
1219 x_off += mv->x >> 3;
1220 y_off += mv->y >> 3;
1223 src1 += y_off * linesize + x_off;
1224 src2 += y_off * linesize + x_off;
1225 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1226 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1227 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1228 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1229 src1 - my_idx * linesize - mx_idx,
1231 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1232 x_off - mx_idx, y_off - my_idx, width, height);
1233 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1234 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1236 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1237 src2 - my_idx * linesize - mx_idx,
1239 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1240 x_off - mx_idx, y_off - my_idx, width, height);
1241 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1242 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1244 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1245 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1248 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1249 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1250 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1254 static av_always_inline
1255 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1256 ThreadFrame *ref_frame, int x_off, int y_off,
1257 int bx_off, int by_off,
1258 int block_w, int block_h,
1259 int width, int height, VP56mv *mv)
1264 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1265 ref_frame, mv, x_off + bx_off, y_off + by_off,
1266 block_w, block_h, width, height, s->linesize,
1267 s->put_pixels_tab[block_w == 8]);
1270 if (s->profile == 3) {
1274 x_off >>= 1; y_off >>= 1;
1275 bx_off >>= 1; by_off >>= 1;
1276 width >>= 1; height >>= 1;
1277 block_w >>= 1; block_h >>= 1;
1278 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1279 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1280 &uvmv, x_off + bx_off, y_off + by_off,
1281 block_w, block_h, width, height, s->uvlinesize,
1282 s->put_pixels_tab[1 + (block_w == 4)]);
1285 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1286 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1287 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1289 /* Don't prefetch refs that haven't been used very often this frame. */
1290 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1291 int x_off = mb_x << 4, y_off = mb_y << 4;
1292 int mx = (mb->mv.x>>2) + x_off + 8;
1293 int my = (mb->mv.y>>2) + y_off;
1294 uint8_t **src= s->framep[ref]->tf.f->data;
1295 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1296 /* For threading, a ff_thread_await_progress here might be useful, but
1297 * it actually slows down the decoder. Since a bad prefetch doesn't
1298 * generate bad decoder output, we don't run it here. */
1299 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1300 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1301 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1306 * Apply motion vectors to prediction buffer, chapter 18.
1308 static av_always_inline
1309 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1310 VP8Macroblock *mb, int mb_x, int mb_y)
1312 int x_off = mb_x << 4, y_off = mb_y << 4;
1313 int width = 16*s->mb_width, height = 16*s->mb_height;
1314 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1315 VP56mv *bmv = mb->bmv;
1317 switch (mb->partitioning) {
1318 case VP8_SPLITMVMODE_NONE:
1319 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1320 0, 0, 16, 16, width, height, &mb->mv);
1322 case VP8_SPLITMVMODE_4x4: {
1327 for (y = 0; y < 4; y++) {
1328 for (x = 0; x < 4; x++) {
1329 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1331 4*x + x_off, 4*y + y_off, 4, 4,
1332 width, height, s->linesize,
1333 s->put_pixels_tab[2]);
1338 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1339 for (y = 0; y < 2; y++) {
1340 for (x = 0; x < 2; x++) {
1341 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1342 mb->bmv[ 2*y * 4 + 2*x+1].x +
1343 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1344 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1345 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1346 mb->bmv[ 2*y * 4 + 2*x+1].y +
1347 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1348 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1349 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1350 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1351 if (s->profile == 3) {
1355 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1356 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1357 4*x + x_off, 4*y + y_off, 4, 4,
1358 width, height, s->uvlinesize,
1359 s->put_pixels_tab[2]);
1364 case VP8_SPLITMVMODE_16x8:
1365 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1366 0, 0, 16, 8, width, height, &bmv[0]);
1367 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1368 0, 8, 16, 8, width, height, &bmv[1]);
1370 case VP8_SPLITMVMODE_8x16:
1371 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1372 0, 0, 8, 16, width, height, &bmv[0]);
1373 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1374 8, 0, 8, 16, width, height, &bmv[1]);
1376 case VP8_SPLITMVMODE_8x8:
1377 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1378 0, 0, 8, 8, width, height, &bmv[0]);
1379 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1380 8, 0, 8, 8, width, height, &bmv[1]);
1381 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1382 0, 8, 8, 8, width, height, &bmv[2]);
1383 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1384 8, 8, 8, 8, width, height, &bmv[3]);
1389 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1390 uint8_t *dst[3], VP8Macroblock *mb)
1394 if (mb->mode != MODE_I4x4) {
1395 uint8_t *y_dst = dst[0];
1396 for (y = 0; y < 4; y++) {
1397 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1399 if (nnz4&~0x01010101) {
1400 for (x = 0; x < 4; x++) {
1401 if ((uint8_t)nnz4 == 1)
1402 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1403 else if((uint8_t)nnz4 > 1)
1404 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1410 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1413 y_dst += 4*s->linesize;
1417 for (ch = 0; ch < 2; ch++) {
1418 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1420 uint8_t *ch_dst = dst[1+ch];
1421 if (nnz4&~0x01010101) {
1422 for (y = 0; y < 2; y++) {
1423 for (x = 0; x < 2; x++) {
1424 if ((uint8_t)nnz4 == 1)
1425 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1426 else if((uint8_t)nnz4 > 1)
1427 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1430 goto chroma_idct_end;
1432 ch_dst += 4*s->uvlinesize;
1435 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1442 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1444 int interior_limit, filter_level;
1446 if (s->segmentation.enabled) {
1447 filter_level = s->segmentation.filter_level[mb->segment];
1448 if (!s->segmentation.absolute_vals)
1449 filter_level += s->filter.level;
1451 filter_level = s->filter.level;
1453 if (s->lf_delta.enabled) {
1454 filter_level += s->lf_delta.ref[mb->ref_frame];
1455 filter_level += s->lf_delta.mode[mb->mode];
1458 filter_level = av_clip_uintp2(filter_level, 6);
1460 interior_limit = filter_level;
1461 if (s->filter.sharpness) {
1462 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1463 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1465 interior_limit = FFMAX(interior_limit, 1);
1467 f->filter_level = filter_level;
1468 f->inner_limit = interior_limit;
1469 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1472 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1474 int mbedge_lim, bedge_lim, hev_thresh;
1475 int filter_level = f->filter_level;
1476 int inner_limit = f->inner_limit;
1477 int inner_filter = f->inner_filter;
1478 int linesize = s->linesize;
1479 int uvlinesize = s->uvlinesize;
1480 static const uint8_t hev_thresh_lut[2][64] = {
1481 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1482 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1483 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1485 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1487 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1494 bedge_lim = 2*filter_level + inner_limit;
1495 mbedge_lim = bedge_lim + 4;
1497 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1500 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1501 mbedge_lim, inner_limit, hev_thresh);
1502 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1503 mbedge_lim, inner_limit, hev_thresh);
1507 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1508 inner_limit, hev_thresh);
1509 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1510 inner_limit, hev_thresh);
1511 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1512 inner_limit, hev_thresh);
1513 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1514 uvlinesize, bedge_lim,
1515 inner_limit, hev_thresh);
1519 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1520 mbedge_lim, inner_limit, hev_thresh);
1521 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1522 mbedge_lim, inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1527 linesize, bedge_lim,
1528 inner_limit, hev_thresh);
1529 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1530 linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1533 linesize, bedge_lim,
1534 inner_limit, hev_thresh);
1535 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1536 dst[2] + 4 * uvlinesize,
1537 uvlinesize, bedge_lim,
1538 inner_limit, hev_thresh);
1542 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1544 int mbedge_lim, bedge_lim;
1545 int filter_level = f->filter_level;
1546 int inner_limit = f->inner_limit;
1547 int inner_filter = f->inner_filter;
1548 int linesize = s->linesize;
1553 bedge_lim = 2*filter_level + inner_limit;
1554 mbedge_lim = bedge_lim + 4;
1557 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1559 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1560 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1561 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1565 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1567 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1568 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1569 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1573 #define MARGIN (16 << 2)
1574 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1575 VP8Frame *prev_frame)
1577 VP8Context *s = avctx->priv_data;
1580 s->mv_min.y = -MARGIN;
1581 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1582 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1583 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1584 int mb_xy = mb_y*s->mb_width;
1586 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1588 s->mv_min.x = -MARGIN;
1589 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1590 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1592 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1593 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1594 prev_frame && prev_frame->seg_map ?
1595 prev_frame->seg_map->data + mb_xy : NULL, 1);
1605 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1607 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1608 if (otd->thread_mb_pos < tmp) {\
1609 pthread_mutex_lock(&otd->lock);\
1610 td->wait_mb_pos = tmp;\
1612 if (otd->thread_mb_pos >= tmp)\
1614 pthread_cond_wait(&otd->cond, &otd->lock);\
1616 td->wait_mb_pos = INT_MAX;\
1617 pthread_mutex_unlock(&otd->lock);\
1621 #define update_pos(td, mb_y, mb_x)\
1623 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1624 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1625 int is_null = (next_td == NULL) || (prev_td == NULL);\
1626 int pos_check = (is_null) ? 1 :\
1627 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1628 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1629 td->thread_mb_pos = pos;\
1630 if (sliced_threading && pos_check) {\
1631 pthread_mutex_lock(&td->lock);\
1632 pthread_cond_broadcast(&td->cond);\
1633 pthread_mutex_unlock(&td->lock);\
1637 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1638 #define update_pos(td, mb_y, mb_x)
1641 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1642 int jobnr, int threadnr)
1644 VP8Context *s = avctx->priv_data;
1645 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1646 int mb_y = td->thread_mb_pos>>16;
1647 int mb_x, mb_xy = mb_y*s->mb_width;
1648 int num_jobs = s->num_jobs;
1649 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1650 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1653 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1654 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1655 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1657 if (mb_y == 0) prev_td = td;
1658 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1659 if (mb_y == s->mb_height-1) next_td = td;
1660 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1661 if (s->mb_layout == 1)
1662 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1664 // Make sure the previous frame has read its segmentation map,
1665 // if we re-use the same map.
1666 if (prev_frame && s->segmentation.enabled &&
1667 !s->segmentation.update_map)
1668 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1669 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1670 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1671 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1674 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1676 s->mv_min.x = -MARGIN;
1677 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1679 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1680 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1681 if (prev_td != td) {
1682 if (threadnr != 0) {
1683 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1685 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1689 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1690 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1693 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1694 prev_frame && prev_frame->seg_map ?
1695 prev_frame->seg_map->data + mb_xy : NULL, 0);
1697 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1700 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1702 if (mb->mode <= MODE_I4x4)
1703 intra_predict(s, td, dst, mb, mb_x, mb_y);
1705 inter_predict(s, td, dst, mb, mb_x, mb_y);
1707 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1710 idct_mb(s, td, dst, mb);
1712 AV_ZERO64(td->left_nnz);
1713 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1715 // Reset DC block predictors if they would exist if the mb had coefficients
1716 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1717 td->left_nnz[8] = 0;
1718 s->top_nnz[mb_x][8] = 0;
1722 if (s->deblock_filter)
1723 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1725 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1726 if (s->filter.simple)
1727 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1729 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1732 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1740 if (mb_x == s->mb_width+1) {
1741 update_pos(td, mb_y, s->mb_width+3);
1743 update_pos(td, mb_y, mb_x);
1748 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1749 int jobnr, int threadnr)
1751 VP8Context *s = avctx->priv_data;
1752 VP8ThreadData *td = &s->thread_data[threadnr];
1753 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1754 AVFrame *curframe = s->curframe->tf.f;
1756 VP8ThreadData *prev_td, *next_td;
1758 curframe->data[0] + 16*mb_y*s->linesize,
1759 curframe->data[1] + 8*mb_y*s->uvlinesize,
1760 curframe->data[2] + 8*mb_y*s->uvlinesize
1763 if (s->mb_layout == 1)
1764 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1766 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1768 if (mb_y == 0) prev_td = td;
1769 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1770 if (mb_y == s->mb_height-1) next_td = td;
1771 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1773 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1774 VP8FilterStrength *f = &td->filter_strength[mb_x];
1775 if (prev_td != td) {
1776 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1779 if (next_td != &s->thread_data[0]) {
1780 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1783 if (num_jobs == 1) {
1784 if (s->filter.simple)
1785 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1787 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1790 if (s->filter.simple)
1791 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1793 filter_mb(s, dst, f, mb_x, mb_y);
1798 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1802 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1803 int jobnr, int threadnr)
1805 VP8Context *s = avctx->priv_data;
1806 VP8ThreadData *td = &s->thread_data[jobnr];
1807 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1808 VP8Frame *curframe = s->curframe;
1809 int mb_y, num_jobs = s->num_jobs;
1810 td->thread_nr = threadnr;
1811 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1812 if (mb_y >= s->mb_height) break;
1813 td->thread_mb_pos = mb_y<<16;
1814 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1815 if (s->deblock_filter)
1816 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1817 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1822 if (avctx->active_thread_type == FF_THREAD_FRAME)
1823 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1829 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1832 VP8Context *s = avctx->priv_data;
1833 int ret, i, referenced, num_jobs;
1834 enum AVDiscard skip_thresh;
1835 VP8Frame *av_uninit(curframe), *prev_frame;
1837 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1840 prev_frame = s->framep[VP56_FRAME_CURRENT];
1842 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1843 || s->update_altref == VP56_FRAME_CURRENT;
1845 skip_thresh = !referenced ? AVDISCARD_NONREF :
1846 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1848 if (avctx->skip_frame >= skip_thresh) {
1850 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1853 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1855 // release no longer referenced frames
1856 for (i = 0; i < 5; i++)
1857 if (s->frames[i].tf.f->data[0] &&
1858 &s->frames[i] != prev_frame &&
1859 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1860 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1861 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1862 vp8_release_frame(s, &s->frames[i]);
1864 // find a free buffer
1865 for (i = 0; i < 5; i++)
1866 if (&s->frames[i] != prev_frame &&
1867 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1868 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1869 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1870 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1874 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1877 if (curframe->tf.f->data[0])
1878 vp8_release_frame(s, curframe);
1880 // Given that arithmetic probabilities are updated every frame, it's quite likely
1881 // that the values we have on a random interframe are complete junk if we didn't
1882 // start decode on a keyframe. So just don't display anything rather than junk.
1883 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1884 !s->framep[VP56_FRAME_GOLDEN] ||
1885 !s->framep[VP56_FRAME_GOLDEN2])) {
1886 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1887 ret = AVERROR_INVALIDDATA;
1891 curframe->tf.f->key_frame = s->keyframe;
1892 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1893 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1894 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1898 // check if golden and altref are swapped
1899 if (s->update_altref != VP56_FRAME_NONE) {
1900 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1902 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1904 if (s->update_golden != VP56_FRAME_NONE) {
1905 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1907 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1909 if (s->update_last) {
1910 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1912 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1914 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1916 ff_thread_finish_setup(avctx);
1918 s->linesize = curframe->tf.f->linesize[0];
1919 s->uvlinesize = curframe->tf.f->linesize[1];
1921 if (!s->thread_data[0].edge_emu_buffer)
1922 for (i = 0; i < MAX_THREADS; i++)
1923 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1925 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1926 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1928 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1929 if (!s->mb_layout && s->keyframe)
1930 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1932 memset(s->ref_count, 0, sizeof(s->ref_count));
1935 if (s->mb_layout == 1) {
1936 // Make sure the previous frame has read its segmentation map,
1937 // if we re-use the same map.
1938 if (prev_frame && s->segmentation.enabled &&
1939 !s->segmentation.update_map)
1940 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1941 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1944 if (avctx->active_thread_type == FF_THREAD_FRAME)
1947 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1948 s->num_jobs = num_jobs;
1949 s->curframe = curframe;
1950 s->prev_frame = prev_frame;
1951 s->mv_min.y = -MARGIN;
1952 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1953 for (i = 0; i < MAX_THREADS; i++) {
1954 s->thread_data[i].thread_mb_pos = 0;
1955 s->thread_data[i].wait_mb_pos = INT_MAX;
1957 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1959 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1960 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1963 // if future frames don't use the updated probabilities,
1964 // reset them to the values we saved
1965 if (!s->update_probabilities)
1966 s->prob[0] = s->prob[1];
1968 if (!s->invisible) {
1969 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
1976 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1980 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
1982 VP8Context *s = avctx->priv_data;
1985 vp8_decode_flush_impl(avctx, 1);
1986 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
1987 av_frame_free(&s->frames[i].tf.f);
1992 static av_cold int vp8_init_frames(VP8Context *s)
1995 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1996 s->frames[i].tf.f = av_frame_alloc();
1997 if (!s->frames[i].tf.f)
1998 return AVERROR(ENOMEM);
2003 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2005 VP8Context *s = avctx->priv_data;
2009 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2010 avctx->internal->allocate_progress = 1;
2012 ff_videodsp_init(&s->vdsp, 8);
2013 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2014 ff_vp8dsp_init(&s->vp8dsp);
2016 if ((ret = vp8_init_frames(s)) < 0) {
2017 ff_vp8_decode_free(avctx);
2024 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2026 VP8Context *s = avctx->priv_data;
2031 if ((ret = vp8_init_frames(s)) < 0) {
2032 ff_vp8_decode_free(avctx);
2039 #define REBASE(pic) \
2040 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2042 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2044 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2047 if (s->macroblocks_base &&
2048 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2050 s->mb_width = s_src->mb_width;
2051 s->mb_height = s_src->mb_height;
2054 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2055 s->segmentation = s_src->segmentation;
2056 s->lf_delta = s_src->lf_delta;
2057 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2059 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2060 if (s_src->frames[i].tf.f->data[0]) {
2061 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2067 s->framep[0] = REBASE(s_src->next_framep[0]);
2068 s->framep[1] = REBASE(s_src->next_framep[1]);
2069 s->framep[2] = REBASE(s_src->next_framep[2]);
2070 s->framep[3] = REBASE(s_src->next_framep[3]);
2075 AVCodec ff_vp8_decoder = {
2077 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2078 .type = AVMEDIA_TYPE_VIDEO,
2079 .id = AV_CODEC_ID_VP8,
2080 .priv_data_size = sizeof(VP8Context),
2081 .init = ff_vp8_decode_init,
2082 .close = ff_vp8_decode_free,
2083 .decode = ff_vp8_decode_frame,
2084 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2085 .flush = vp8_decode_flush,
2086 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2087 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),