2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
48 av_freep(&s->thread_data[i].edge_emu_buffer);
50 av_freep(&s->thread_data);
51 av_freep(&s->macroblocks_base);
52 av_freep(&s->intra4x4_pred_mode_top);
53 av_freep(&s->top_nnz);
54 av_freep(&s->top_border);
56 s->macroblocks = NULL;
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
62 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
65 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66 ff_thread_release_buffer(s->avctx, &f->tf);
67 return AVERROR(ENOMEM);
72 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
74 av_buffer_unref(&f->seg_map);
75 ff_thread_release_buffer(s->avctx, &f->tf);
78 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
82 vp8_release_frame(s, dst);
84 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
87 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88 vp8_release_frame(s, dst);
89 return AVERROR(ENOMEM);
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
98 VP8Context *s = avctx->priv_data;
101 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102 vp8_release_frame(s, &s->frames[i]);
103 memset(s->framep, 0, sizeof(s->framep));
109 static void vp8_decode_flush(AVCodecContext *avctx)
111 vp8_decode_flush_impl(avctx, 0);
114 static int update_dimensions(VP8Context *s, int width, int height)
116 AVCodecContext *avctx = s->avctx;
119 if (width != s->avctx->width ||
120 height != s->avctx->height) {
121 if (av_image_check_size(width, height, 0, s->avctx))
122 return AVERROR_INVALIDDATA;
124 vp8_decode_flush_impl(s->avctx, 1);
126 avcodec_set_dimensions(s->avctx, width, height);
129 s->mb_width = (s->avctx->coded_width +15) / 16;
130 s->mb_height = (s->avctx->coded_height+15) / 16;
132 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
133 if (!s->mb_layout) { // Frame threading and one thread
134 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
135 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
137 else // Sliced threading
138 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
139 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
140 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
141 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
143 for (i = 0; i < MAX_THREADS; i++) {
144 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146 pthread_mutex_init(&s->thread_data[i].lock, NULL);
147 pthread_cond_init(&s->thread_data[i].cond, NULL);
151 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
152 (!s->intra4x4_pred_mode_top && !s->mb_layout))
153 return AVERROR(ENOMEM);
155 s->macroblocks = s->macroblocks_base + 1;
160 static void parse_segment_info(VP8Context *s)
162 VP56RangeCoder *c = &s->c;
165 s->segmentation.update_map = vp8_rac_get(c);
167 if (vp8_rac_get(c)) { // update segment feature data
168 s->segmentation.absolute_vals = vp8_rac_get(c);
170 for (i = 0; i < 4; i++)
171 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
173 for (i = 0; i < 4; i++)
174 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
176 if (s->segmentation.update_map)
177 for (i = 0; i < 3; i++)
178 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
181 static void update_lf_deltas(VP8Context *s)
183 VP56RangeCoder *c = &s->c;
186 for (i = 0; i < 4; i++) {
187 if (vp8_rac_get(c)) {
188 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
191 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
195 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
196 if (vp8_rac_get(c)) {
197 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
200 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
205 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
207 const uint8_t *sizes = buf;
210 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
212 buf += 3*(s->num_coeff_partitions-1);
213 buf_size -= 3*(s->num_coeff_partitions-1);
217 for (i = 0; i < s->num_coeff_partitions-1; i++) {
218 int size = AV_RL24(sizes + 3*i);
219 if (buf_size - size < 0)
222 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
226 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
231 static void get_quants(VP8Context *s)
233 VP56RangeCoder *c = &s->c;
236 int yac_qi = vp8_rac_get_uint(c, 7);
237 int ydc_delta = vp8_rac_get_sint(c, 4);
238 int y2dc_delta = vp8_rac_get_sint(c, 4);
239 int y2ac_delta = vp8_rac_get_sint(c, 4);
240 int uvdc_delta = vp8_rac_get_sint(c, 4);
241 int uvac_delta = vp8_rac_get_sint(c, 4);
243 for (i = 0; i < 4; i++) {
244 if (s->segmentation.enabled) {
245 base_qi = s->segmentation.base_quant[i];
246 if (!s->segmentation.absolute_vals)
251 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
252 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
253 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
254 /* 101581>>16 is equivalent to 155/100 */
255 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
279 VP56RangeCoder *c = &s->c;
282 return VP56_FRAME_CURRENT;
284 switch (vp8_rac_get_uint(c, 2)) {
286 return VP56_FRAME_PREVIOUS;
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
290 return VP56_FRAME_NONE;
293 static void update_refs(VP8Context *s)
295 VP56RangeCoder *c = &s->c;
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
306 VP56RangeCoder *c = &s->c;
307 int header_size, hscale, vscale, i, j, k, l, m, ret;
308 int width = s->avctx->width;
309 int height = s->avctx->height;
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
314 header_size = AV_RL24(buf) >> 5;
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334 return AVERROR_INVALIDDATA;
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
343 if (hscale || vscale)
344 avpriv_request_sample(s->avctx, "Upscaling");
346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
355 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
358 ff_vp56_init_range_decoder(c, buf, header_size);
360 buf_size -= header_size;
364 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
365 vp8_rac_get(c); // whether we can skip clamping in dsp functions
368 if ((s->segmentation.enabled = vp8_rac_get(c)))
369 parse_segment_info(s);
371 s->segmentation.update_map = 0; // FIXME: move this to some init function?
373 s->filter.simple = vp8_rac_get(c);
374 s->filter.level = vp8_rac_get_uint(c, 6);
375 s->filter.sharpness = vp8_rac_get_uint(c, 3);
377 if ((s->lf_delta.enabled = vp8_rac_get(c)))
381 if (setup_partitions(s, buf, buf_size)) {
382 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
383 return AVERROR_INVALIDDATA;
386 if (!s->macroblocks_base || /* first frame */
387 width != s->avctx->width || height != s->avctx->height) {
388 if ((ret = update_dimensions(s, width, height)) < 0)
396 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
397 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
400 // if we aren't saving this frame's probabilities for future frames,
401 // make a copy of the current probabilities
402 if (!(s->update_probabilities = vp8_rac_get(c)))
403 s->prob[1] = s->prob[0];
405 s->update_last = s->keyframe || vp8_rac_get(c);
407 for (i = 0; i < 4; i++)
408 for (j = 0; j < 8; j++)
409 for (k = 0; k < 3; k++)
410 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
412 int prob = vp8_rac_get_uint(c, 8);
413 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
414 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
417 if ((s->mbskip_enabled = vp8_rac_get(c)))
418 s->prob->mbskip = vp8_rac_get_uint(c, 8);
421 s->prob->intra = vp8_rac_get_uint(c, 8);
422 s->prob->last = vp8_rac_get_uint(c, 8);
423 s->prob->golden = vp8_rac_get_uint(c, 8);
426 for (i = 0; i < 4; i++)
427 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
429 for (i = 0; i < 3; i++)
430 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
432 // 17.2 MV probability update
433 for (i = 0; i < 2; i++)
434 for (j = 0; j < 19; j++)
435 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
436 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
442 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
444 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
445 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
449 * Motion vector coding, 17.1.
451 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
455 if (vp56_rac_get_prob_branchy(c, p[0])) {
458 for (i = 0; i < 3; i++)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 for (i = 9; i > 3; i--)
461 x += vp56_rac_get_prob(c, p[9 + i]) << i;
462 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
466 const uint8_t *ps = p+2;
467 bit = vp56_rac_get_prob(c, *ps);
470 bit = vp56_rac_get_prob(c, *ps);
473 x += vp56_rac_get_prob(c, *ps);
476 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
479 static av_always_inline
480 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
483 return vp8_submv_prob[4-!!left];
485 return vp8_submv_prob[2];
486 return vp8_submv_prob[1-!!left];
490 * Split motion vector prediction, 16.4.
491 * @returns the number of motion vectors parsed (2, 4 or 16)
493 static av_always_inline
494 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
498 VP8Macroblock *top_mb;
499 VP8Macroblock *left_mb = &mb[-1];
500 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
502 *mbsplits_cur, *firstidx;
504 VP56mv *left_mv = left_mb->bmv;
505 VP56mv *cur_mv = mb->bmv;
507 if (!layout) // layout is inlined, s->mb_layout is not
510 top_mb = &mb[-s->mb_width-1];
511 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
512 top_mv = top_mb->bmv;
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
516 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
518 part_idx = VP8_SPLITMVMODE_8x8;
521 part_idx = VP8_SPLITMVMODE_4x4;
524 num = vp8_mbsplit_count[part_idx];
525 mbsplits_cur = vp8_mbsplits[part_idx],
526 firstidx = vp8_mbfirstidx[part_idx];
527 mb->partitioning = part_idx;
529 for (n = 0; n < num; n++) {
531 uint32_t left, above;
532 const uint8_t *submv_prob;
535 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
537 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
539 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
541 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
543 submv_prob = get_submv_prob(left, above);
545 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
547 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
548 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
549 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
551 AV_ZERO32(&mb->bmv[n]);
554 AV_WN32A(&mb->bmv[n], above);
557 AV_WN32A(&mb->bmv[n], left);
564 static av_always_inline
565 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
567 VP8Macroblock *mb_edge[3] = { 0 /* top */,
570 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
573 int cur_sign_bias = s->sign_bias[mb->ref_frame];
574 int8_t *sign_bias = s->sign_bias;
576 uint8_t cnt[4] = { 0 };
577 VP56RangeCoder *c = &s->c;
579 if (!layout) { // layout is inlined (s->mb_layout is not)
584 mb_edge[0] = mb - s->mb_width-1;
585 mb_edge[2] = mb - s->mb_width-2;
588 AV_ZERO32(&near_mv[0]);
589 AV_ZERO32(&near_mv[1]);
590 AV_ZERO32(&near_mv[2]);
592 /* Process MB on top, left and top-left */
593 #define MV_EDGE_CHECK(n)\
595 VP8Macroblock *edge = mb_edge[n];\
596 int edge_ref = edge->ref_frame;\
597 if (edge_ref != VP56_FRAME_CURRENT) {\
598 uint32_t mv = AV_RN32A(&edge->mv);\
600 if (cur_sign_bias != sign_bias[edge_ref]) {\
601 /* SWAR negate of the values in mv. */\
603 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
605 if (!n || mv != AV_RN32A(&near_mv[idx]))\
606 AV_WN32A(&near_mv[++idx], mv);\
607 cnt[idx] += 1 + (n != 2);\
609 cnt[CNT_ZERO] += 1 + (n != 2);\
617 mb->partitioning = VP8_SPLITMVMODE_NONE;
618 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
619 mb->mode = VP8_MVMODE_MV;
621 /* If we have three distinct MVs, merge first and last if they're the same */
622 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623 cnt[CNT_NEAREST] += 1;
625 /* Swap near and nearest if necessary */
626 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
627 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
628 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
634 /* Choose the best mv out of 0,0 and the nearest mv */
635 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
637 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
638 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
640 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
641 mb->mode = VP8_MVMODE_SPLIT;
642 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
644 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
645 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
649 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
653 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
657 mb->mode = VP8_MVMODE_ZERO;
663 static av_always_inline
664 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
665 int mb_x, int keyframe, int layout)
667 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
670 VP8Macroblock *mb_top = mb - s->mb_width - 1;
671 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
676 uint8_t* const left = s->intra4x4_pred_mode_left;
678 top = mb->intra4x4_pred_mode_top;
680 top = s->intra4x4_pred_mode_top + 4 * mb_x;
681 for (y = 0; y < 4; y++) {
682 for (x = 0; x < 4; x++) {
684 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
685 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
686 left[y] = top[x] = *intra4x4;
692 for (i = 0; i < 16; i++)
693 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
697 static av_always_inline
698 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
699 uint8_t *segment, uint8_t *ref, int layout)
701 VP56RangeCoder *c = &s->c;
703 if (s->segmentation.update_map)
704 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
705 else if (s->segmentation.enabled)
706 *segment = ref ? *ref : *segment;
707 mb->segment = *segment;
709 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
712 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
714 if (mb->mode == MODE_I4x4) {
715 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
717 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
718 if (s->mb_layout == 1)
719 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
721 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
722 AV_WN32A( s->intra4x4_pred_mode_left, modes);
725 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
726 mb->ref_frame = VP56_FRAME_CURRENT;
727 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
729 if (vp56_rac_get_prob_branchy(c, s->prob->last))
730 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
731 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
733 mb->ref_frame = VP56_FRAME_PREVIOUS;
734 s->ref_count[mb->ref_frame-1]++;
736 // motion vectors, 16.3
737 decode_mvs(s, mb, mb_x, mb_y, layout);
740 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
742 if (mb->mode == MODE_I4x4)
743 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
745 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
746 mb->ref_frame = VP56_FRAME_CURRENT;
747 mb->partitioning = VP8_SPLITMVMODE_NONE;
748 AV_ZERO32(&mb->bmv[0]);
752 #ifndef decode_block_coeffs_internal
754 * @param r arithmetic bitstream reader context
755 * @param block destination for block coefficients
756 * @param probs probabilities to use when reading trees from the bitstream
757 * @param i initial coeff index, 0 unless a separate DC block is coded
758 * @param qmul array holding the dc/ac dequant factor at position 0/1
759 * @return 0 if no coeffs were decoded
760 * otherwise, the index of the last coeff decoded plus one
762 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
763 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
764 int i, uint8_t *token_prob, int16_t qmul[2])
766 VP56RangeCoder c = *r;
770 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
774 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
776 break; // invalid input; blocks should end with EOB
777 token_prob = probs[i][0];
781 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
783 token_prob = probs[i+1][1];
785 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
786 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
788 coeff += vp56_rac_get_prob(&c, token_prob[5]);
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
793 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
794 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
798 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
800 } else { // DCT_CAT3 and up
801 int a = vp56_rac_get_prob(&c, token_prob[8]);
802 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
803 int cat = (a<<1) + b;
804 coeff = 3 + (8<<cat);
805 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
808 token_prob = probs[i+1][2];
810 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
819 * @param c arithmetic bitstream reader context
820 * @param block destination for block coefficients
821 * @param probs probabilities to use when reading trees from the bitstream
822 * @param i initial coeff index, 0 unless a separate DC block is coded
823 * @param zero_nhood the initial prediction context for number of surrounding
824 * all-zero blocks (only left/top, so 0-2)
825 * @param qmul array holding the dc/ac dequant factor at position 0/1
826 * @return 0 if no coeffs were decoded
827 * otherwise, the index of the last coeff decoded plus one
829 static av_always_inline
830 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
831 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
832 int i, int zero_nhood, int16_t qmul[2])
834 uint8_t *token_prob = probs[i][zero_nhood];
835 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
837 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
840 static av_always_inline
841 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
842 uint8_t t_nnz[9], uint8_t l_nnz[9])
844 int i, x, y, luma_start = 0, luma_ctx = 3;
845 int nnz_pred, nnz, nnz_total = 0;
846 int segment = mb->segment;
849 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
850 nnz_pred = t_nnz[8] + l_nnz[8];
852 // decode DC values and do hadamard
853 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
854 s->qmat[segment].luma_dc_qmul);
855 l_nnz[8] = t_nnz[8] = !!nnz;
860 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
862 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
869 for (y = 0; y < 4; y++)
870 for (x = 0; x < 4; x++) {
871 nnz_pred = l_nnz[y] + t_nnz[x];
872 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
873 nnz_pred, s->qmat[segment].luma_qmul);
874 // nnz+block_dc may be one more than the actual last index, but we don't care
875 td->non_zero_count_cache[y][x] = nnz + block_dc;
876 t_nnz[x] = l_nnz[y] = !!nnz;
881 // TODO: what to do about dimensions? 2nd dim for luma is x,
882 // but for chroma it's (y<<1)|x
883 for (i = 4; i < 6; i++)
884 for (y = 0; y < 2; y++)
885 for (x = 0; x < 2; x++) {
886 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
887 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
888 nnz_pred, s->qmat[segment].chroma_qmul);
889 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
890 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
894 // if there were no coded coeffs despite the macroblock not being marked skip,
895 // we MUST not do the inner loop filter and should not do IDCT
896 // Since skip isn't used for bitstream prediction, just manually set it.
901 static av_always_inline
902 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
903 int linesize, int uvlinesize, int simple)
905 AV_COPY128(top_border, src_y + 15*linesize);
907 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
908 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
912 static av_always_inline
913 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
914 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
915 int simple, int xchg)
917 uint8_t *top_border_m1 = top_border-32; // for TL prediction
919 src_cb -= uvlinesize;
920 src_cr -= uvlinesize;
922 #define XCHG(a,b,xchg) do { \
923 if (xchg) AV_SWAP64(b,a); \
924 else AV_COPY64(b,a); \
927 XCHG(top_border_m1+8, src_y-8, xchg);
928 XCHG(top_border, src_y, xchg);
929 XCHG(top_border+8, src_y+8, 1);
930 if (mb_x < mb_width-1)
931 XCHG(top_border+32, src_y+16, 1);
933 // only copy chroma for normal loop filter
934 // or to initialize the top row to 127
935 if (!simple || !mb_y) {
936 XCHG(top_border_m1+16, src_cb-8, xchg);
937 XCHG(top_border_m1+24, src_cr-8, xchg);
938 XCHG(top_border+16, src_cb, 1);
939 XCHG(top_border+24, src_cr, 1);
943 static av_always_inline
944 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
947 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
949 return mb_y ? mode : LEFT_DC_PRED8x8;
953 static av_always_inline
954 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
957 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
959 return mb_y ? mode : HOR_PRED8x8;
963 static av_always_inline
964 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
966 if (mode == DC_PRED8x8) {
967 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
973 static av_always_inline
974 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
978 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
980 return !mb_y ? DC_127_PRED8x8 : mode;
982 return !mb_x ? DC_129_PRED8x8 : mode;
983 case PLANE_PRED8x8 /*TM*/:
984 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
989 static av_always_inline
990 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
993 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
995 return mb_y ? mode : HOR_VP8_PRED;
999 static av_always_inline
1000 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1004 if (!mb_x && mb_y) {
1009 case DIAG_DOWN_LEFT_PRED:
1010 case VERT_LEFT_PRED:
1011 return !mb_y ? DC_127_PRED : mode;
1019 return !mb_x ? DC_129_PRED : mode;
1021 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1022 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1023 case DIAG_DOWN_RIGHT_PRED:
1024 case VERT_RIGHT_PRED:
1033 static av_always_inline
1034 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1035 VP8Macroblock *mb, int mb_x, int mb_y)
1037 AVCodecContext *avctx = s->avctx;
1038 int x, y, mode, nnz;
1041 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1042 // otherwise, skip it if we aren't going to deblock
1043 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1044 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1045 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1046 s->filter.simple, 1);
1048 if (mb->mode < MODE_I4x4) {
1049 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1050 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1052 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1054 s->hpc.pred16x16[mode](dst[0], s->linesize);
1056 uint8_t *ptr = dst[0];
1057 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1058 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1060 // all blocks on the right edge of the macroblock use bottom edge
1061 // the top macroblock for their topright edge
1062 uint8_t *tr_right = ptr - s->linesize + 16;
1064 // if we're on the right edge of the frame, said edge is extended
1065 // from the top macroblock
1066 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1067 mb_x == s->mb_width-1) {
1068 tr = tr_right[-1]*0x01010101u;
1069 tr_right = (uint8_t *)&tr;
1073 AV_ZERO128(td->non_zero_count_cache);
1075 for (y = 0; y < 4; y++) {
1076 uint8_t *topright = ptr + 4 - s->linesize;
1077 for (x = 0; x < 4; x++) {
1078 int copy = 0, linesize = s->linesize;
1079 uint8_t *dst = ptr+4*x;
1080 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1082 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1085 topright = tr_right;
1087 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1088 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1090 dst = copy_dst + 12;
1094 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1096 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1100 copy_dst[3] = ptr[4*x-s->linesize-1];
1107 copy_dst[35] = 129U;
1109 copy_dst[11] = ptr[4*x -1];
1110 copy_dst[19] = ptr[4*x+s->linesize -1];
1111 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1112 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1118 s->hpc.pred4x4[mode](dst, topright, linesize);
1120 AV_COPY32(ptr+4*x , copy_dst+12);
1121 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1122 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1123 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1126 nnz = td->non_zero_count_cache[y][x];
1129 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1131 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1136 ptr += 4*s->linesize;
1141 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1142 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1144 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1146 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1147 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1149 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1150 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1151 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1152 s->filter.simple, 0);
1155 static const uint8_t subpel_idx[3][8] = {
1156 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1157 // also function pointer index
1158 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1159 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1165 * @param s VP8 decoding context
1166 * @param dst target buffer for block data at block position
1167 * @param ref reference picture buffer at origin (0, 0)
1168 * @param mv motion vector (relative to block position) to get pixel data from
1169 * @param x_off horizontal position of block from origin (0, 0)
1170 * @param y_off vertical position of block from origin (0, 0)
1171 * @param block_w width of block (16, 8 or 4)
1172 * @param block_h height of block (always same as block_w)
1173 * @param width width of src/dst plane data
1174 * @param height height of src/dst plane data
1175 * @param linesize size of a single line of plane data, including padding
1176 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1178 static av_always_inline
1179 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1180 ThreadFrame *ref, const VP56mv *mv,
1181 int x_off, int y_off, int block_w, int block_h,
1182 int width, int height, int linesize,
1183 vp8_mc_func mc_func[3][3])
1185 uint8_t *src = ref->f->data[0];
1189 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1190 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1192 x_off += mv->x >> 2;
1193 y_off += mv->y >> 2;
1196 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1197 src += y_off * linesize + x_off;
1198 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1199 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1200 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1201 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1202 x_off - mx_idx, y_off - my_idx, width, height);
1203 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1205 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1207 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1208 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1213 * chroma MC function
1215 * @param s VP8 decoding context
1216 * @param dst1 target buffer for block data at block position (U plane)
1217 * @param dst2 target buffer for block data at block position (V plane)
1218 * @param ref reference picture buffer at origin (0, 0)
1219 * @param mv motion vector (relative to block position) to get pixel data from
1220 * @param x_off horizontal position of block from origin (0, 0)
1221 * @param y_off vertical position of block from origin (0, 0)
1222 * @param block_w width of block (16, 8 or 4)
1223 * @param block_h height of block (always same as block_w)
1224 * @param width width of src/dst plane data
1225 * @param height height of src/dst plane data
1226 * @param linesize size of a single line of plane data, including padding
1227 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1229 static av_always_inline
1230 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1231 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1232 int block_w, int block_h, int width, int height, int linesize,
1233 vp8_mc_func mc_func[3][3])
1235 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1238 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1239 int my = mv->y&7, my_idx = subpel_idx[0][my];
1241 x_off += mv->x >> 3;
1242 y_off += mv->y >> 3;
1245 src1 += y_off * linesize + x_off;
1246 src2 += y_off * linesize + x_off;
1247 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1248 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1249 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1250 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1251 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1252 x_off - mx_idx, y_off - my_idx, width, height);
1253 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1254 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1256 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1257 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1258 x_off - mx_idx, y_off - my_idx, width, height);
1259 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1260 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1262 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1263 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1266 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1267 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1268 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1272 static av_always_inline
1273 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1274 ThreadFrame *ref_frame, int x_off, int y_off,
1275 int bx_off, int by_off,
1276 int block_w, int block_h,
1277 int width, int height, VP56mv *mv)
1282 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1283 ref_frame, mv, x_off + bx_off, y_off + by_off,
1284 block_w, block_h, width, height, s->linesize,
1285 s->put_pixels_tab[block_w == 8]);
1288 if (s->profile == 3) {
1292 x_off >>= 1; y_off >>= 1;
1293 bx_off >>= 1; by_off >>= 1;
1294 width >>= 1; height >>= 1;
1295 block_w >>= 1; block_h >>= 1;
1296 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1297 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1298 &uvmv, x_off + bx_off, y_off + by_off,
1299 block_w, block_h, width, height, s->uvlinesize,
1300 s->put_pixels_tab[1 + (block_w == 4)]);
1303 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1304 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1305 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1307 /* Don't prefetch refs that haven't been used very often this frame. */
1308 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1309 int x_off = mb_x << 4, y_off = mb_y << 4;
1310 int mx = (mb->mv.x>>2) + x_off + 8;
1311 int my = (mb->mv.y>>2) + y_off;
1312 uint8_t **src= s->framep[ref]->tf.f->data;
1313 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1314 /* For threading, a ff_thread_await_progress here might be useful, but
1315 * it actually slows down the decoder. Since a bad prefetch doesn't
1316 * generate bad decoder output, we don't run it here. */
1317 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1318 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1319 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1324 * Apply motion vectors to prediction buffer, chapter 18.
1326 static av_always_inline
1327 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1328 VP8Macroblock *mb, int mb_x, int mb_y)
1330 int x_off = mb_x << 4, y_off = mb_y << 4;
1331 int width = 16*s->mb_width, height = 16*s->mb_height;
1332 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1333 VP56mv *bmv = mb->bmv;
1335 switch (mb->partitioning) {
1336 case VP8_SPLITMVMODE_NONE:
1337 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1338 0, 0, 16, 16, width, height, &mb->mv);
1340 case VP8_SPLITMVMODE_4x4: {
1345 for (y = 0; y < 4; y++) {
1346 for (x = 0; x < 4; x++) {
1347 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1349 4*x + x_off, 4*y + y_off, 4, 4,
1350 width, height, s->linesize,
1351 s->put_pixels_tab[2]);
1356 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1357 for (y = 0; y < 2; y++) {
1358 for (x = 0; x < 2; x++) {
1359 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1360 mb->bmv[ 2*y * 4 + 2*x+1].x +
1361 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1362 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1363 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1364 mb->bmv[ 2*y * 4 + 2*x+1].y +
1365 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1366 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1367 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1368 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1369 if (s->profile == 3) {
1373 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1374 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1375 4*x + x_off, 4*y + y_off, 4, 4,
1376 width, height, s->uvlinesize,
1377 s->put_pixels_tab[2]);
1382 case VP8_SPLITMVMODE_16x8:
1383 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1384 0, 0, 16, 8, width, height, &bmv[0]);
1385 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386 0, 8, 16, 8, width, height, &bmv[1]);
1388 case VP8_SPLITMVMODE_8x16:
1389 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1390 0, 0, 8, 16, width, height, &bmv[0]);
1391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392 8, 0, 8, 16, width, height, &bmv[1]);
1394 case VP8_SPLITMVMODE_8x8:
1395 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396 0, 0, 8, 8, width, height, &bmv[0]);
1397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398 8, 0, 8, 8, width, height, &bmv[1]);
1399 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1400 0, 8, 8, 8, width, height, &bmv[2]);
1401 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402 8, 8, 8, 8, width, height, &bmv[3]);
1407 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1408 uint8_t *dst[3], VP8Macroblock *mb)
1412 if (mb->mode != MODE_I4x4) {
1413 uint8_t *y_dst = dst[0];
1414 for (y = 0; y < 4; y++) {
1415 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1417 if (nnz4&~0x01010101) {
1418 for (x = 0; x < 4; x++) {
1419 if ((uint8_t)nnz4 == 1)
1420 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1421 else if((uint8_t)nnz4 > 1)
1422 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1428 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1431 y_dst += 4*s->linesize;
1435 for (ch = 0; ch < 2; ch++) {
1436 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1438 uint8_t *ch_dst = dst[1+ch];
1439 if (nnz4&~0x01010101) {
1440 for (y = 0; y < 2; y++) {
1441 for (x = 0; x < 2; x++) {
1442 if ((uint8_t)nnz4 == 1)
1443 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1444 else if((uint8_t)nnz4 > 1)
1445 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1448 goto chroma_idct_end;
1450 ch_dst += 4*s->uvlinesize;
1453 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1460 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1462 int interior_limit, filter_level;
1464 if (s->segmentation.enabled) {
1465 filter_level = s->segmentation.filter_level[mb->segment];
1466 if (!s->segmentation.absolute_vals)
1467 filter_level += s->filter.level;
1469 filter_level = s->filter.level;
1471 if (s->lf_delta.enabled) {
1472 filter_level += s->lf_delta.ref[mb->ref_frame];
1473 filter_level += s->lf_delta.mode[mb->mode];
1476 filter_level = av_clip_uintp2(filter_level, 6);
1478 interior_limit = filter_level;
1479 if (s->filter.sharpness) {
1480 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1481 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1483 interior_limit = FFMAX(interior_limit, 1);
1485 f->filter_level = filter_level;
1486 f->inner_limit = interior_limit;
1487 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1490 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1492 int mbedge_lim, bedge_lim, hev_thresh;
1493 int filter_level = f->filter_level;
1494 int inner_limit = f->inner_limit;
1495 int inner_filter = f->inner_filter;
1496 int linesize = s->linesize;
1497 int uvlinesize = s->uvlinesize;
1498 static const uint8_t hev_thresh_lut[2][64] = {
1499 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1500 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1503 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1505 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1512 bedge_lim = 2*filter_level + inner_limit;
1513 mbedge_lim = bedge_lim + 4;
1515 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1518 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1519 mbedge_lim, inner_limit, hev_thresh);
1520 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1521 mbedge_lim, inner_limit, hev_thresh);
1525 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1526 inner_limit, hev_thresh);
1527 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1528 inner_limit, hev_thresh);
1529 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1530 inner_limit, hev_thresh);
1531 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1532 uvlinesize, bedge_lim,
1533 inner_limit, hev_thresh);
1537 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1538 mbedge_lim, inner_limit, hev_thresh);
1539 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1540 mbedge_lim, inner_limit, hev_thresh);
1544 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1545 linesize, bedge_lim,
1546 inner_limit, hev_thresh);
1547 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1548 linesize, bedge_lim,
1549 inner_limit, hev_thresh);
1550 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1551 linesize, bedge_lim,
1552 inner_limit, hev_thresh);
1553 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1554 dst[2] + 4 * uvlinesize,
1555 uvlinesize, bedge_lim,
1556 inner_limit, hev_thresh);
1560 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1562 int mbedge_lim, bedge_lim;
1563 int filter_level = f->filter_level;
1564 int inner_limit = f->inner_limit;
1565 int inner_filter = f->inner_filter;
1566 int linesize = s->linesize;
1571 bedge_lim = 2*filter_level + inner_limit;
1572 mbedge_lim = bedge_lim + 4;
1575 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1579 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1583 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1587 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1591 #define MARGIN (16 << 2)
1592 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1593 VP8Frame *prev_frame)
1595 VP8Context *s = avctx->priv_data;
1598 s->mv_min.y = -MARGIN;
1599 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1600 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1601 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1602 int mb_xy = mb_y*s->mb_width;
1604 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1606 s->mv_min.x = -MARGIN;
1607 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1608 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1610 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1611 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1612 prev_frame && prev_frame->seg_map ?
1613 prev_frame->seg_map->data + mb_xy : NULL, 1);
1623 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1625 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1626 if (otd->thread_mb_pos < tmp) {\
1627 pthread_mutex_lock(&otd->lock);\
1628 td->wait_mb_pos = tmp;\
1630 if (otd->thread_mb_pos >= tmp)\
1632 pthread_cond_wait(&otd->cond, &otd->lock);\
1634 td->wait_mb_pos = INT_MAX;\
1635 pthread_mutex_unlock(&otd->lock);\
1639 #define update_pos(td, mb_y, mb_x)\
1641 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1642 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1643 int is_null = (next_td == NULL) || (prev_td == NULL);\
1644 int pos_check = (is_null) ? 1 :\
1645 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1646 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1647 td->thread_mb_pos = pos;\
1648 if (sliced_threading && pos_check) {\
1649 pthread_mutex_lock(&td->lock);\
1650 pthread_cond_broadcast(&td->cond);\
1651 pthread_mutex_unlock(&td->lock);\
1655 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1656 #define update_pos(td, mb_y, mb_x)
1659 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1660 int jobnr, int threadnr)
1662 VP8Context *s = avctx->priv_data;
1663 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1664 int mb_y = td->thread_mb_pos>>16;
1665 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1666 int num_jobs = s->num_jobs;
1667 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1668 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1671 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1672 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1673 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1675 if (mb_y == 0) prev_td = td;
1676 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1677 if (mb_y == s->mb_height-1) next_td = td;
1678 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1679 if (s->mb_layout == 1)
1680 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1682 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1683 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1684 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1687 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1688 // left edge of 129 for intra prediction
1689 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1690 for (i = 0; i < 3; i++)
1691 for (y = 0; y < 16>>!!i; y++)
1692 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1694 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1698 s->mv_min.x = -MARGIN;
1699 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1701 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1702 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1703 if (prev_td != td) {
1704 if (threadnr != 0) {
1705 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1707 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1711 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1712 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1715 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1716 prev_frame && prev_frame->seg_map ?
1717 prev_frame->seg_map->data + mb_xy : NULL, 0);
1719 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1722 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1724 if (mb->mode <= MODE_I4x4)
1725 intra_predict(s, td, dst, mb, mb_x, mb_y);
1727 inter_predict(s, td, dst, mb, mb_x, mb_y);
1729 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1732 idct_mb(s, td, dst, mb);
1734 AV_ZERO64(td->left_nnz);
1735 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1737 // Reset DC block predictors if they would exist if the mb had coefficients
1738 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1739 td->left_nnz[8] = 0;
1740 s->top_nnz[mb_x][8] = 0;
1744 if (s->deblock_filter)
1745 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1747 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1748 if (s->filter.simple)
1749 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1751 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1754 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1762 if (mb_x == s->mb_width+1) {
1763 update_pos(td, mb_y, s->mb_width+3);
1765 update_pos(td, mb_y, mb_x);
1770 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1771 int jobnr, int threadnr)
1773 VP8Context *s = avctx->priv_data;
1774 VP8ThreadData *td = &s->thread_data[threadnr];
1775 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1776 AVFrame *curframe = s->curframe->tf.f;
1778 VP8ThreadData *prev_td, *next_td;
1780 curframe->data[0] + 16*mb_y*s->linesize,
1781 curframe->data[1] + 8*mb_y*s->uvlinesize,
1782 curframe->data[2] + 8*mb_y*s->uvlinesize
1785 if (s->mb_layout == 1)
1786 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1788 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1790 if (mb_y == 0) prev_td = td;
1791 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1792 if (mb_y == s->mb_height-1) next_td = td;
1793 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1795 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1796 VP8FilterStrength *f = &td->filter_strength[mb_x];
1797 if (prev_td != td) {
1798 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1801 if (next_td != &s->thread_data[0]) {
1802 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1805 if (num_jobs == 1) {
1806 if (s->filter.simple)
1807 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1809 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1812 if (s->filter.simple)
1813 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1815 filter_mb(s, dst, f, mb_x, mb_y);
1820 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1824 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1825 int jobnr, int threadnr)
1827 VP8Context *s = avctx->priv_data;
1828 VP8ThreadData *td = &s->thread_data[jobnr];
1829 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1830 VP8Frame *curframe = s->curframe;
1831 int mb_y, num_jobs = s->num_jobs;
1832 td->thread_nr = threadnr;
1833 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1834 if (mb_y >= s->mb_height) break;
1835 td->thread_mb_pos = mb_y<<16;
1836 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1837 if (s->deblock_filter)
1838 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1839 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1844 if (avctx->active_thread_type == FF_THREAD_FRAME)
1845 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1851 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1854 VP8Context *s = avctx->priv_data;
1855 int ret, i, referenced, num_jobs;
1856 enum AVDiscard skip_thresh;
1857 VP8Frame *av_uninit(curframe), *prev_frame;
1859 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1862 prev_frame = s->framep[VP56_FRAME_CURRENT];
1864 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1865 || s->update_altref == VP56_FRAME_CURRENT;
1867 skip_thresh = !referenced ? AVDISCARD_NONREF :
1868 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1870 if (avctx->skip_frame >= skip_thresh) {
1872 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1875 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1877 // release no longer referenced frames
1878 for (i = 0; i < 5; i++)
1879 if (s->frames[i].tf.f->data[0] &&
1880 &s->frames[i] != prev_frame &&
1881 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1882 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1883 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1884 vp8_release_frame(s, &s->frames[i]);
1886 // find a free buffer
1887 for (i = 0; i < 5; i++)
1888 if (&s->frames[i] != prev_frame &&
1889 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1892 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1896 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1899 if (curframe->tf.f->data[0])
1900 vp8_release_frame(s, curframe);
1902 // Given that arithmetic probabilities are updated every frame, it's quite likely
1903 // that the values we have on a random interframe are complete junk if we didn't
1904 // start decode on a keyframe. So just don't display anything rather than junk.
1905 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1906 !s->framep[VP56_FRAME_GOLDEN] ||
1907 !s->framep[VP56_FRAME_GOLDEN2])) {
1908 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1909 ret = AVERROR_INVALIDDATA;
1913 curframe->tf.f->key_frame = s->keyframe;
1914 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1915 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1916 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1920 // check if golden and altref are swapped
1921 if (s->update_altref != VP56_FRAME_NONE) {
1922 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1924 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1926 if (s->update_golden != VP56_FRAME_NONE) {
1927 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1929 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1931 if (s->update_last) {
1932 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1934 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1936 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1938 ff_thread_finish_setup(avctx);
1940 s->linesize = curframe->tf.f->linesize[0];
1941 s->uvlinesize = curframe->tf.f->linesize[1];
1943 if (!s->thread_data[0].edge_emu_buffer)
1944 for (i = 0; i < MAX_THREADS; i++)
1945 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1947 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1948 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1950 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1951 if (!s->mb_layout && s->keyframe)
1952 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1954 // top edge of 127 for intra prediction
1955 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1956 s->top_border[0][15] = s->top_border[0][23] = 127;
1957 s->top_border[0][31] = 127;
1958 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1960 memset(s->ref_count, 0, sizeof(s->ref_count));
1963 // Make sure the previous frame has read its segmentation map,
1964 // if we re-use the same map.
1965 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1966 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1968 if (s->mb_layout == 1)
1969 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1971 if (avctx->active_thread_type == FF_THREAD_FRAME)
1974 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1975 s->num_jobs = num_jobs;
1976 s->curframe = curframe;
1977 s->prev_frame = prev_frame;
1978 s->mv_min.y = -MARGIN;
1979 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1980 for (i = 0; i < MAX_THREADS; i++) {
1981 s->thread_data[i].thread_mb_pos = 0;
1982 s->thread_data[i].wait_mb_pos = INT_MAX;
1984 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1986 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1987 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1990 // if future frames don't use the updated probabilities,
1991 // reset them to the values we saved
1992 if (!s->update_probabilities)
1993 s->prob[0] = s->prob[1];
1995 if (!s->invisible) {
1996 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2003 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2007 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2009 VP8Context *s = avctx->priv_data;
2012 vp8_decode_flush_impl(avctx, 1);
2013 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2014 av_frame_free(&s->frames[i].tf.f);
2019 static av_cold int vp8_init_frames(VP8Context *s)
2022 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2023 s->frames[i].tf.f = av_frame_alloc();
2024 if (!s->frames[i].tf.f)
2025 return AVERROR(ENOMEM);
2030 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2032 VP8Context *s = avctx->priv_data;
2036 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2037 avctx->internal->allocate_progress = 1;
2039 ff_videodsp_init(&s->vdsp, 8);
2040 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2041 ff_vp8dsp_init(&s->vp8dsp);
2043 if ((ret = vp8_init_frames(s)) < 0) {
2044 vp8_decode_free(avctx);
2051 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2053 VP8Context *s = avctx->priv_data;
2058 if ((ret = vp8_init_frames(s)) < 0) {
2059 vp8_decode_free(avctx);
2066 #define REBASE(pic) \
2067 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2069 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2071 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2074 if (s->macroblocks_base &&
2075 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2077 s->mb_width = s_src->mb_width;
2078 s->mb_height = s_src->mb_height;
2081 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2082 s->segmentation = s_src->segmentation;
2083 s->lf_delta = s_src->lf_delta;
2084 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2086 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2087 if (s_src->frames[i].tf.f->data[0]) {
2088 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2094 s->framep[0] = REBASE(s_src->next_framep[0]);
2095 s->framep[1] = REBASE(s_src->next_framep[1]);
2096 s->framep[2] = REBASE(s_src->next_framep[2]);
2097 s->framep[3] = REBASE(s_src->next_framep[3]);
2102 AVCodec ff_vp8_decoder = {
2104 .type = AVMEDIA_TYPE_VIDEO,
2105 .id = AV_CODEC_ID_VP8,
2106 .priv_data_size = sizeof(VP8Context),
2107 .init = vp8_decode_init,
2108 .close = vp8_decode_free,
2109 .decode = vp8_decode_frame,
2110 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2111 .flush = vp8_decode_flush,
2112 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2113 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2114 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),