2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
48 av_freep(&s->thread_data[i].edge_emu_buffer);
50 av_freep(&s->thread_data);
51 av_freep(&s->macroblocks_base);
52 av_freep(&s->intra4x4_pred_mode_top);
53 av_freep(&s->top_nnz);
54 av_freep(&s->top_border);
56 s->macroblocks = NULL;
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
62 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
65 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66 ff_thread_release_buffer(s->avctx, &f->tf);
67 return AVERROR(ENOMEM);
72 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
74 av_buffer_unref(&f->seg_map);
75 ff_thread_release_buffer(s->avctx, &f->tf);
78 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
82 vp8_release_frame(s, dst);
84 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
87 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88 vp8_release_frame(s, dst);
89 return AVERROR(ENOMEM);
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
98 VP8Context *s = avctx->priv_data;
101 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102 vp8_release_frame(s, &s->frames[i]);
103 memset(s->framep, 0, sizeof(s->framep));
109 static void vp8_decode_flush(AVCodecContext *avctx)
111 vp8_decode_flush_impl(avctx, 0);
114 static int update_dimensions(VP8Context *s, int width, int height)
116 AVCodecContext *avctx = s->avctx;
119 if (width != s->avctx->width ||
120 height != s->avctx->height) {
121 vp8_decode_flush_impl(s->avctx, 1);
123 ret = ff_set_dimensions(s->avctx, width, height);
128 s->mb_width = (s->avctx->coded_width +15) / 16;
129 s->mb_height = (s->avctx->coded_height+15) / 16;
131 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
132 if (!s->mb_layout) { // Frame threading and one thread
133 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
134 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
136 else // Sliced threading
137 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
138 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
139 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
140 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
142 for (i = 0; i < MAX_THREADS; i++) {
143 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
145 pthread_mutex_init(&s->thread_data[i].lock, NULL);
146 pthread_cond_init(&s->thread_data[i].cond, NULL);
150 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
151 (!s->intra4x4_pred_mode_top && !s->mb_layout))
152 return AVERROR(ENOMEM);
154 s->macroblocks = s->macroblocks_base + 1;
159 static void parse_segment_info(VP8Context *s)
161 VP56RangeCoder *c = &s->c;
164 s->segmentation.update_map = vp8_rac_get(c);
166 if (vp8_rac_get(c)) { // update segment feature data
167 s->segmentation.absolute_vals = vp8_rac_get(c);
169 for (i = 0; i < 4; i++)
170 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
172 for (i = 0; i < 4; i++)
173 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
175 if (s->segmentation.update_map)
176 for (i = 0; i < 3; i++)
177 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
180 static void update_lf_deltas(VP8Context *s)
182 VP56RangeCoder *c = &s->c;
185 for (i = 0; i < 4; i++) {
186 if (vp8_rac_get(c)) {
187 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
190 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
194 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
195 if (vp8_rac_get(c)) {
196 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
199 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
204 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
206 const uint8_t *sizes = buf;
209 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
211 buf += 3*(s->num_coeff_partitions-1);
212 buf_size -= 3*(s->num_coeff_partitions-1);
216 for (i = 0; i < s->num_coeff_partitions-1; i++) {
217 int size = AV_RL24(sizes + 3*i);
218 if (buf_size - size < 0)
221 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
225 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
230 static void get_quants(VP8Context *s)
232 VP56RangeCoder *c = &s->c;
235 int yac_qi = vp8_rac_get_uint(c, 7);
236 int ydc_delta = vp8_rac_get_sint(c, 4);
237 int y2dc_delta = vp8_rac_get_sint(c, 4);
238 int y2ac_delta = vp8_rac_get_sint(c, 4);
239 int uvdc_delta = vp8_rac_get_sint(c, 4);
240 int uvac_delta = vp8_rac_get_sint(c, 4);
242 for (i = 0; i < 4; i++) {
243 if (s->segmentation.enabled) {
244 base_qi = s->segmentation.base_quant[i];
245 if (!s->segmentation.absolute_vals)
250 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
251 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
252 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
253 /* 101581>>16 is equivalent to 155/100 */
254 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
255 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
256 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
258 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
259 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
264 * Determine which buffers golden and altref should be updated with after this frame.
265 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
267 * Intra frames update all 3 references
268 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
269 * If the update (golden|altref) flag is set, it's updated with the current frame
270 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
271 * If the flag is not set, the number read means:
273 * 1: VP56_FRAME_PREVIOUS
274 * 2: update golden with altref, or update altref with golden
276 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
278 VP56RangeCoder *c = &s->c;
281 return VP56_FRAME_CURRENT;
283 switch (vp8_rac_get_uint(c, 2)) {
285 return VP56_FRAME_PREVIOUS;
287 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
289 return VP56_FRAME_NONE;
292 static void update_refs(VP8Context *s)
294 VP56RangeCoder *c = &s->c;
296 int update_golden = vp8_rac_get(c);
297 int update_altref = vp8_rac_get(c);
299 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
300 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
303 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
305 VP56RangeCoder *c = &s->c;
306 int header_size, hscale, vscale, i, j, k, l, m, ret;
307 int width = s->avctx->width;
308 int height = s->avctx->height;
310 s->keyframe = !(buf[0] & 1);
311 s->profile = (buf[0]>>1) & 7;
312 s->invisible = !(buf[0] & 0x10);
313 header_size = AV_RL24(buf) >> 5;
318 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
321 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
322 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
325 if (header_size > buf_size - 7*s->keyframe) {
326 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
327 return AVERROR_INVALIDDATA;
331 if (AV_RL24(buf) != 0x2a019d) {
332 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
333 return AVERROR_INVALIDDATA;
335 width = AV_RL16(buf+3) & 0x3fff;
336 height = AV_RL16(buf+5) & 0x3fff;
337 hscale = buf[4] >> 6;
338 vscale = buf[6] >> 6;
342 if (hscale || vscale)
343 avpriv_request_sample(s->avctx, "Upscaling");
345 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
346 for (i = 0; i < 4; i++)
347 for (j = 0; j < 16; j++)
348 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
349 sizeof(s->prob->token[i][j]));
350 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
351 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
352 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
353 memset(&s->segmentation, 0, sizeof(s->segmentation));
354 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
357 ff_vp56_init_range_decoder(c, buf, header_size);
359 buf_size -= header_size;
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
404 s->update_last = s->keyframe || vp8_rac_get(c);
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
448 * Motion vector coding, 17.1.
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
469 bit = vp56_rac_get_prob(c, *ps);
472 x += vp56_rac_get_prob(c, *ps);
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
482 return vp8_submv_prob[4-!!left];
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
501 *mbsplits_cur, *firstidx;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
506 if (!layout) // layout is inlined, s->mb_layout is not
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
517 part_idx = VP8_SPLITMVMODE_8x8;
520 part_idx = VP8_SPLITMVMODE_4x4;
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
528 for (n = 0; n < num; n++) {
530 uint32_t left, above;
531 const uint8_t *submv_prob;
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 submv_prob = get_submv_prob(left, above);
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
550 AV_ZERO32(&mb->bmv[n]);
553 AV_WN32A(&mb->bmv[n], above);
556 AV_WN32A(&mb->bmv[n], left);
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
578 if (!layout) { // layout is inlined (s->mb_layout is not)
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
656 mb->mode = VP8_MVMODE_ZERO;
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
675 uint8_t* const left = s->intra4x4_pred_mode_left;
677 top = mb->intra4x4_pred_mode_top;
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
700 VP56RangeCoder *c = &s->c;
702 if (s->segmentation.update_map)
703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
704 else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
751 #ifndef decode_block_coeffs_internal
753 * @param r arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
765 VP56RangeCoder c = *r;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782 token_prob = probs[i+1][1];
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 token_prob = probs[i+1][2];
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
904 AV_COPY128(top_border, src_y + 15*linesize);
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948 return mb_y ? mode : LEFT_DC_PRED8x8;
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958 return mb_y ? mode : HOR_PRED8x8;
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
979 return !mb_y ? DC_127_PRED8x8 : mode;
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
994 return mb_y ? mode : HOR_VP8_PRED;
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1003 if (!mb_x && mb_y) {
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1018 return !mb_x ? DC_129_PRED : mode;
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1072 AV_ZERO128(td->non_zero_count_cache);
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1084 topright = tr_right;
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1089 dst = copy_dst + 12;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1106 copy_dst[35] = 129U;
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125 nnz = td->non_zero_count_cache[y][x];
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1135 ptr += 4*s->linesize;
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 ThreadFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, ptrdiff_t linesize,
1182 vp8_mc_func mc_func[3][3])
1184 uint8_t *src = ref->f->data[0];
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1200 src - my_idx * linesize - mx_idx,
1202 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1203 x_off - mx_idx, y_off - my_idx, width, height);
1204 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1206 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1208 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1214 * chroma MC function
1216 * @param s VP8 decoding context
1217 * @param dst1 target buffer for block data at block position (U plane)
1218 * @param dst2 target buffer for block data at block position (V plane)
1219 * @param ref reference picture buffer at origin (0, 0)
1220 * @param mv motion vector (relative to block position) to get pixel data from
1221 * @param x_off horizontal position of block from origin (0, 0)
1222 * @param y_off vertical position of block from origin (0, 0)
1223 * @param block_w width of block (16, 8 or 4)
1224 * @param block_h height of block (always same as block_w)
1225 * @param width width of src/dst plane data
1226 * @param height height of src/dst plane data
1227 * @param linesize size of a single line of plane data, including padding
1228 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1230 static av_always_inline
1231 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1232 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1234 vp8_mc_func mc_func[3][3])
1236 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1239 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1240 int my = mv->y&7, my_idx = subpel_idx[0][my];
1242 x_off += mv->x >> 3;
1243 y_off += mv->y >> 3;
1246 src1 += y_off * linesize + x_off;
1247 src2 += y_off * linesize + x_off;
1248 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1250 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1251 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1252 src1 - my_idx * linesize - mx_idx,
1254 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1255 x_off - mx_idx, y_off - my_idx, width, height);
1256 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1257 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1259 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1260 src2 - my_idx * linesize - mx_idx,
1262 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1263 x_off - mx_idx, y_off - my_idx, width, height);
1264 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1265 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1267 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1268 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1271 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1272 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1273 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1277 static av_always_inline
1278 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1279 ThreadFrame *ref_frame, int x_off, int y_off,
1280 int bx_off, int by_off,
1281 int block_w, int block_h,
1282 int width, int height, VP56mv *mv)
1287 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1288 ref_frame, mv, x_off + bx_off, y_off + by_off,
1289 block_w, block_h, width, height, s->linesize,
1290 s->put_pixels_tab[block_w == 8]);
1293 if (s->profile == 3) {
1297 x_off >>= 1; y_off >>= 1;
1298 bx_off >>= 1; by_off >>= 1;
1299 width >>= 1; height >>= 1;
1300 block_w >>= 1; block_h >>= 1;
1301 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1302 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1303 &uvmv, x_off + bx_off, y_off + by_off,
1304 block_w, block_h, width, height, s->uvlinesize,
1305 s->put_pixels_tab[1 + (block_w == 4)]);
1308 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1309 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1310 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1312 /* Don't prefetch refs that haven't been used very often this frame. */
1313 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1314 int x_off = mb_x << 4, y_off = mb_y << 4;
1315 int mx = (mb->mv.x>>2) + x_off + 8;
1316 int my = (mb->mv.y>>2) + y_off;
1317 uint8_t **src= s->framep[ref]->tf.f->data;
1318 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1319 /* For threading, a ff_thread_await_progress here might be useful, but
1320 * it actually slows down the decoder. Since a bad prefetch doesn't
1321 * generate bad decoder output, we don't run it here. */
1322 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1323 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1324 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1329 * Apply motion vectors to prediction buffer, chapter 18.
1331 static av_always_inline
1332 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1333 VP8Macroblock *mb, int mb_x, int mb_y)
1335 int x_off = mb_x << 4, y_off = mb_y << 4;
1336 int width = 16*s->mb_width, height = 16*s->mb_height;
1337 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1338 VP56mv *bmv = mb->bmv;
1340 switch (mb->partitioning) {
1341 case VP8_SPLITMVMODE_NONE:
1342 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1343 0, 0, 16, 16, width, height, &mb->mv);
1345 case VP8_SPLITMVMODE_4x4: {
1350 for (y = 0; y < 4; y++) {
1351 for (x = 0; x < 4; x++) {
1352 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1354 4*x + x_off, 4*y + y_off, 4, 4,
1355 width, height, s->linesize,
1356 s->put_pixels_tab[2]);
1361 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1362 for (y = 0; y < 2; y++) {
1363 for (x = 0; x < 2; x++) {
1364 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1365 mb->bmv[ 2*y * 4 + 2*x+1].x +
1366 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1367 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1368 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1369 mb->bmv[ 2*y * 4 + 2*x+1].y +
1370 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1371 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1372 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1373 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1374 if (s->profile == 3) {
1378 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1379 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1380 4*x + x_off, 4*y + y_off, 4, 4,
1381 width, height, s->uvlinesize,
1382 s->put_pixels_tab[2]);
1387 case VP8_SPLITMVMODE_16x8:
1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389 0, 0, 16, 8, width, height, &bmv[0]);
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 0, 8, 16, 8, width, height, &bmv[1]);
1393 case VP8_SPLITMVMODE_8x16:
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 0, 0, 8, 16, width, height, &bmv[0]);
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 8, 0, 8, 16, width, height, &bmv[1]);
1399 case VP8_SPLITMVMODE_8x8:
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 0, 0, 8, 8, width, height, &bmv[0]);
1402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403 8, 0, 8, 8, width, height, &bmv[1]);
1404 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1405 0, 8, 8, 8, width, height, &bmv[2]);
1406 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1407 8, 8, 8, 8, width, height, &bmv[3]);
1412 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1413 uint8_t *dst[3], VP8Macroblock *mb)
1417 if (mb->mode != MODE_I4x4) {
1418 uint8_t *y_dst = dst[0];
1419 for (y = 0; y < 4; y++) {
1420 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1422 if (nnz4&~0x01010101) {
1423 for (x = 0; x < 4; x++) {
1424 if ((uint8_t)nnz4 == 1)
1425 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1426 else if((uint8_t)nnz4 > 1)
1427 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1433 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1436 y_dst += 4*s->linesize;
1440 for (ch = 0; ch < 2; ch++) {
1441 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1443 uint8_t *ch_dst = dst[1+ch];
1444 if (nnz4&~0x01010101) {
1445 for (y = 0; y < 2; y++) {
1446 for (x = 0; x < 2; x++) {
1447 if ((uint8_t)nnz4 == 1)
1448 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1449 else if((uint8_t)nnz4 > 1)
1450 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1453 goto chroma_idct_end;
1455 ch_dst += 4*s->uvlinesize;
1458 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1465 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1467 int interior_limit, filter_level;
1469 if (s->segmentation.enabled) {
1470 filter_level = s->segmentation.filter_level[mb->segment];
1471 if (!s->segmentation.absolute_vals)
1472 filter_level += s->filter.level;
1474 filter_level = s->filter.level;
1476 if (s->lf_delta.enabled) {
1477 filter_level += s->lf_delta.ref[mb->ref_frame];
1478 filter_level += s->lf_delta.mode[mb->mode];
1481 filter_level = av_clip_uintp2(filter_level, 6);
1483 interior_limit = filter_level;
1484 if (s->filter.sharpness) {
1485 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1486 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1488 interior_limit = FFMAX(interior_limit, 1);
1490 f->filter_level = filter_level;
1491 f->inner_limit = interior_limit;
1492 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1495 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1497 int mbedge_lim, bedge_lim, hev_thresh;
1498 int filter_level = f->filter_level;
1499 int inner_limit = f->inner_limit;
1500 int inner_filter = f->inner_filter;
1501 int linesize = s->linesize;
1502 int uvlinesize = s->uvlinesize;
1503 static const uint8_t hev_thresh_lut[2][64] = {
1504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1505 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1506 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1508 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1509 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1510 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1517 bedge_lim = 2*filter_level + inner_limit;
1518 mbedge_lim = bedge_lim + 4;
1520 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1523 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1524 mbedge_lim, inner_limit, hev_thresh);
1525 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1526 mbedge_lim, inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1533 inner_limit, hev_thresh);
1534 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1535 inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1537 uvlinesize, bedge_lim,
1538 inner_limit, hev_thresh);
1542 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1543 mbedge_lim, inner_limit, hev_thresh);
1544 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1545 mbedge_lim, inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1553 linesize, bedge_lim,
1554 inner_limit, hev_thresh);
1555 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1556 linesize, bedge_lim,
1557 inner_limit, hev_thresh);
1558 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1559 dst[2] + 4 * uvlinesize,
1560 uvlinesize, bedge_lim,
1561 inner_limit, hev_thresh);
1565 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1567 int mbedge_lim, bedge_lim;
1568 int filter_level = f->filter_level;
1569 int inner_limit = f->inner_limit;
1570 int inner_filter = f->inner_filter;
1571 int linesize = s->linesize;
1576 bedge_lim = 2*filter_level + inner_limit;
1577 mbedge_lim = bedge_lim + 4;
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1582 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1583 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1584 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1590 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1591 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1592 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1596 #define MARGIN (16 << 2)
1597 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1598 VP8Frame *prev_frame)
1600 VP8Context *s = avctx->priv_data;
1603 s->mv_min.y = -MARGIN;
1604 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1605 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1606 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1607 int mb_xy = mb_y*s->mb_width;
1609 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1611 s->mv_min.x = -MARGIN;
1612 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1613 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1615 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1616 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1617 prev_frame && prev_frame->seg_map ?
1618 prev_frame->seg_map->data + mb_xy : NULL, 1);
1628 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1630 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1631 if (otd->thread_mb_pos < tmp) {\
1632 pthread_mutex_lock(&otd->lock);\
1633 td->wait_mb_pos = tmp;\
1635 if (otd->thread_mb_pos >= tmp)\
1637 pthread_cond_wait(&otd->cond, &otd->lock);\
1639 td->wait_mb_pos = INT_MAX;\
1640 pthread_mutex_unlock(&otd->lock);\
1644 #define update_pos(td, mb_y, mb_x)\
1646 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1647 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1648 int is_null = (next_td == NULL) || (prev_td == NULL);\
1649 int pos_check = (is_null) ? 1 :\
1650 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1651 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1652 td->thread_mb_pos = pos;\
1653 if (sliced_threading && pos_check) {\
1654 pthread_mutex_lock(&td->lock);\
1655 pthread_cond_broadcast(&td->cond);\
1656 pthread_mutex_unlock(&td->lock);\
1660 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1661 #define update_pos(td, mb_y, mb_x)
1664 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1665 int jobnr, int threadnr)
1667 VP8Context *s = avctx->priv_data;
1668 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1669 int mb_y = td->thread_mb_pos>>16;
1670 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1671 int num_jobs = s->num_jobs;
1672 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1673 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1676 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1677 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1678 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1680 if (mb_y == 0) prev_td = td;
1681 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1682 if (mb_y == s->mb_height-1) next_td = td;
1683 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1684 if (s->mb_layout == 1)
1685 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1687 // Make sure the previous frame has read its segmentation map,
1688 // if we re-use the same map.
1689 if (prev_frame && s->segmentation.enabled &&
1690 !s->segmentation.update_map)
1691 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1692 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1693 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1694 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1697 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1698 // left edge of 129 for intra prediction
1699 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1700 for (i = 0; i < 3; i++)
1701 for (y = 0; y < 16>>!!i; y++)
1702 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1704 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1708 s->mv_min.x = -MARGIN;
1709 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1711 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1712 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1713 if (prev_td != td) {
1714 if (threadnr != 0) {
1715 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1717 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1721 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1722 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1725 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1726 prev_frame && prev_frame->seg_map ?
1727 prev_frame->seg_map->data + mb_xy : NULL, 0);
1729 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1732 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1734 if (mb->mode <= MODE_I4x4)
1735 intra_predict(s, td, dst, mb, mb_x, mb_y);
1737 inter_predict(s, td, dst, mb, mb_x, mb_y);
1739 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1742 idct_mb(s, td, dst, mb);
1744 AV_ZERO64(td->left_nnz);
1745 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1747 // Reset DC block predictors if they would exist if the mb had coefficients
1748 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1749 td->left_nnz[8] = 0;
1750 s->top_nnz[mb_x][8] = 0;
1754 if (s->deblock_filter)
1755 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1757 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1758 if (s->filter.simple)
1759 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1761 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1764 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1772 if (mb_x == s->mb_width+1) {
1773 update_pos(td, mb_y, s->mb_width+3);
1775 update_pos(td, mb_y, mb_x);
1780 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1781 int jobnr, int threadnr)
1783 VP8Context *s = avctx->priv_data;
1784 VP8ThreadData *td = &s->thread_data[threadnr];
1785 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1786 AVFrame *curframe = s->curframe->tf.f;
1788 VP8ThreadData *prev_td, *next_td;
1790 curframe->data[0] + 16*mb_y*s->linesize,
1791 curframe->data[1] + 8*mb_y*s->uvlinesize,
1792 curframe->data[2] + 8*mb_y*s->uvlinesize
1795 if (s->mb_layout == 1)
1796 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1798 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1800 if (mb_y == 0) prev_td = td;
1801 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1802 if (mb_y == s->mb_height-1) next_td = td;
1803 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1805 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1806 VP8FilterStrength *f = &td->filter_strength[mb_x];
1807 if (prev_td != td) {
1808 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1811 if (next_td != &s->thread_data[0]) {
1812 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1815 if (num_jobs == 1) {
1816 if (s->filter.simple)
1817 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1819 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1822 if (s->filter.simple)
1823 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1825 filter_mb(s, dst, f, mb_x, mb_y);
1830 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1834 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1835 int jobnr, int threadnr)
1837 VP8Context *s = avctx->priv_data;
1838 VP8ThreadData *td = &s->thread_data[jobnr];
1839 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1840 VP8Frame *curframe = s->curframe;
1841 int mb_y, num_jobs = s->num_jobs;
1842 td->thread_nr = threadnr;
1843 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1844 if (mb_y >= s->mb_height) break;
1845 td->thread_mb_pos = mb_y<<16;
1846 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1847 if (s->deblock_filter)
1848 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1849 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1854 if (avctx->active_thread_type == FF_THREAD_FRAME)
1855 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1861 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1864 VP8Context *s = avctx->priv_data;
1865 int ret, i, referenced, num_jobs;
1866 enum AVDiscard skip_thresh;
1867 VP8Frame *av_uninit(curframe), *prev_frame;
1869 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1872 prev_frame = s->framep[VP56_FRAME_CURRENT];
1874 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1875 || s->update_altref == VP56_FRAME_CURRENT;
1877 skip_thresh = !referenced ? AVDISCARD_NONREF :
1878 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1880 if (avctx->skip_frame >= skip_thresh) {
1882 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1885 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1887 // release no longer referenced frames
1888 for (i = 0; i < 5; i++)
1889 if (s->frames[i].tf.f->data[0] &&
1890 &s->frames[i] != prev_frame &&
1891 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1893 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1894 vp8_release_frame(s, &s->frames[i]);
1896 // find a free buffer
1897 for (i = 0; i < 5; i++)
1898 if (&s->frames[i] != prev_frame &&
1899 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1901 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1902 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1906 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1909 if (curframe->tf.f->data[0])
1910 vp8_release_frame(s, curframe);
1912 // Given that arithmetic probabilities are updated every frame, it's quite likely
1913 // that the values we have on a random interframe are complete junk if we didn't
1914 // start decode on a keyframe. So just don't display anything rather than junk.
1915 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1916 !s->framep[VP56_FRAME_GOLDEN] ||
1917 !s->framep[VP56_FRAME_GOLDEN2])) {
1918 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1919 ret = AVERROR_INVALIDDATA;
1923 curframe->tf.f->key_frame = s->keyframe;
1924 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1925 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1926 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1930 // check if golden and altref are swapped
1931 if (s->update_altref != VP56_FRAME_NONE) {
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1934 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1936 if (s->update_golden != VP56_FRAME_NONE) {
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1939 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1941 if (s->update_last) {
1942 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1944 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1946 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1948 ff_thread_finish_setup(avctx);
1950 s->linesize = curframe->tf.f->linesize[0];
1951 s->uvlinesize = curframe->tf.f->linesize[1];
1953 if (!s->thread_data[0].edge_emu_buffer)
1954 for (i = 0; i < MAX_THREADS; i++)
1955 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1957 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1958 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1960 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1961 if (!s->mb_layout && s->keyframe)
1962 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1964 // top edge of 127 for intra prediction
1965 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1966 s->top_border[0][15] = s->top_border[0][23] = 127;
1967 s->top_border[0][31] = 127;
1968 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1970 memset(s->ref_count, 0, sizeof(s->ref_count));
1973 if (s->mb_layout == 1) {
1974 // Make sure the previous frame has read its segmentation map,
1975 // if we re-use the same map.
1976 if (prev_frame && s->segmentation.enabled &&
1977 !s->segmentation.update_map)
1978 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1979 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1982 if (avctx->active_thread_type == FF_THREAD_FRAME)
1985 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1986 s->num_jobs = num_jobs;
1987 s->curframe = curframe;
1988 s->prev_frame = prev_frame;
1989 s->mv_min.y = -MARGIN;
1990 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1991 for (i = 0; i < MAX_THREADS; i++) {
1992 s->thread_data[i].thread_mb_pos = 0;
1993 s->thread_data[i].wait_mb_pos = INT_MAX;
1995 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1997 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1998 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2001 // if future frames don't use the updated probabilities,
2002 // reset them to the values we saved
2003 if (!s->update_probabilities)
2004 s->prob[0] = s->prob[1];
2006 if (!s->invisible) {
2007 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2014 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2018 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2020 VP8Context *s = avctx->priv_data;
2023 vp8_decode_flush_impl(avctx, 1);
2024 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2025 av_frame_free(&s->frames[i].tf.f);
2030 static av_cold int vp8_init_frames(VP8Context *s)
2033 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2034 s->frames[i].tf.f = av_frame_alloc();
2035 if (!s->frames[i].tf.f)
2036 return AVERROR(ENOMEM);
2041 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2043 VP8Context *s = avctx->priv_data;
2047 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2048 avctx->internal->allocate_progress = 1;
2050 ff_videodsp_init(&s->vdsp, 8);
2051 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2052 ff_vp8dsp_init(&s->vp8dsp);
2054 if ((ret = vp8_init_frames(s)) < 0) {
2055 ff_vp8_decode_free(avctx);
2062 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2064 VP8Context *s = avctx->priv_data;
2069 if ((ret = vp8_init_frames(s)) < 0) {
2070 ff_vp8_decode_free(avctx);
2077 #define REBASE(pic) \
2078 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2080 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2082 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2085 if (s->macroblocks_base &&
2086 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2088 s->mb_width = s_src->mb_width;
2089 s->mb_height = s_src->mb_height;
2092 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2093 s->segmentation = s_src->segmentation;
2094 s->lf_delta = s_src->lf_delta;
2095 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2097 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2098 if (s_src->frames[i].tf.f->data[0]) {
2099 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2105 s->framep[0] = REBASE(s_src->next_framep[0]);
2106 s->framep[1] = REBASE(s_src->next_framep[1]);
2107 s->framep[2] = REBASE(s_src->next_framep[2]);
2108 s->framep[3] = REBASE(s_src->next_framep[3]);
2113 AVCodec ff_vp8_decoder = {
2115 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2116 .type = AVMEDIA_TYPE_VIDEO,
2117 .id = AV_CODEC_ID_VP8,
2118 .priv_data_size = sizeof(VP8Context),
2119 .init = ff_vp8_decode_init,
2120 .close = ff_vp8_decode_free,
2121 .decode = ff_vp8_decode_frame,
2122 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2123 .flush = vp8_decode_flush,
2124 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2125 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),