2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
49 av_freep(&s->thread_data);
50 av_freep(&s->macroblocks_base);
51 av_freep(&s->intra4x4_pred_mode_top);
52 av_freep(&s->top_nnz);
53 av_freep(&s->top_border);
55 s->macroblocks = NULL;
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
61 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
65 ff_thread_release_buffer(s->avctx, &f->tf);
66 return AVERROR(ENOMEM);
71 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
73 av_buffer_unref(&f->seg_map);
74 ff_thread_release_buffer(s->avctx, &f->tf);
77 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
81 vp8_release_frame(s, dst);
83 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
86 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87 vp8_release_frame(s, dst);
88 return AVERROR(ENOMEM);
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
97 VP8Context *s = avctx->priv_data;
100 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101 vp8_release_frame(s, &s->frames[i]);
102 memset(s->framep, 0, sizeof(s->framep));
108 static void vp8_decode_flush(AVCodecContext *avctx)
110 vp8_decode_flush_impl(avctx, 0);
113 static int update_dimensions(VP8Context *s, int width, int height)
115 AVCodecContext *avctx = s->avctx;
118 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
119 height != s->avctx->height) {
120 vp8_decode_flush_impl(s->avctx, 1);
122 ret = ff_set_dimensions(s->avctx, width, height);
127 s->mb_width = (s->avctx->coded_width +15) / 16;
128 s->mb_height = (s->avctx->coded_height+15) / 16;
130 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
131 if (!s->mb_layout) { // Frame threading and one thread
132 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
133 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
135 else // Sliced threading
136 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
137 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
138 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
139 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
141 for (i = 0; i < MAX_THREADS; i++) {
142 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
144 pthread_mutex_init(&s->thread_data[i].lock, NULL);
145 pthread_cond_init(&s->thread_data[i].cond, NULL);
149 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
150 (!s->intra4x4_pred_mode_top && !s->mb_layout))
151 return AVERROR(ENOMEM);
153 s->macroblocks = s->macroblocks_base + 1;
158 static void parse_segment_info(VP8Context *s)
160 VP56RangeCoder *c = &s->c;
163 s->segmentation.update_map = vp8_rac_get(c);
165 if (vp8_rac_get(c)) { // update segment feature data
166 s->segmentation.absolute_vals = vp8_rac_get(c);
168 for (i = 0; i < 4; i++)
169 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
171 for (i = 0; i < 4; i++)
172 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
174 if (s->segmentation.update_map)
175 for (i = 0; i < 3; i++)
176 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
179 static void update_lf_deltas(VP8Context *s)
181 VP56RangeCoder *c = &s->c;
184 for (i = 0; i < 4; i++) {
185 if (vp8_rac_get(c)) {
186 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
189 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
194 if (vp8_rac_get(c)) {
195 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
198 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
203 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 const uint8_t *sizes = buf;
208 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210 buf += 3*(s->num_coeff_partitions-1);
211 buf_size -= 3*(s->num_coeff_partitions-1);
215 for (i = 0; i < s->num_coeff_partitions-1; i++) {
216 int size = AV_RL24(sizes + 3*i);
217 if (buf_size - size < 0)
220 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
224 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
229 static void get_quants(VP8Context *s)
231 VP56RangeCoder *c = &s->c;
234 int yac_qi = vp8_rac_get_uint(c, 7);
235 int ydc_delta = vp8_rac_get_sint(c, 4);
236 int y2dc_delta = vp8_rac_get_sint(c, 4);
237 int y2ac_delta = vp8_rac_get_sint(c, 4);
238 int uvdc_delta = vp8_rac_get_sint(c, 4);
239 int uvac_delta = vp8_rac_get_sint(c, 4);
241 for (i = 0; i < 4; i++) {
242 if (s->segmentation.enabled) {
243 base_qi = s->segmentation.base_quant[i];
244 if (!s->segmentation.absolute_vals)
249 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
250 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
251 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
252 /* 101581>>16 is equivalent to 155/100 */
253 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
254 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
257 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
263 * Determine which buffers golden and altref should be updated with after this frame.
264 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
266 * Intra frames update all 3 references
267 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
268 * If the update (golden|altref) flag is set, it's updated with the current frame
269 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
270 * If the flag is not set, the number read means:
272 * 1: VP56_FRAME_PREVIOUS
273 * 2: update golden with altref, or update altref with golden
275 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
277 VP56RangeCoder *c = &s->c;
280 return VP56_FRAME_CURRENT;
282 switch (vp8_rac_get_uint(c, 2)) {
284 return VP56_FRAME_PREVIOUS;
286 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
288 return VP56_FRAME_NONE;
291 static void update_refs(VP8Context *s)
293 VP56RangeCoder *c = &s->c;
295 int update_golden = vp8_rac_get(c);
296 int update_altref = vp8_rac_get(c);
298 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
304 VP56RangeCoder *c = &s->c;
305 int header_size, hscale, vscale, i, j, k, l, m, ret;
306 int width = s->avctx->width;
307 int height = s->avctx->height;
309 s->keyframe = !(buf[0] & 1);
310 s->profile = (buf[0]>>1) & 7;
311 s->invisible = !(buf[0] & 0x10);
312 header_size = AV_RL24(buf) >> 5;
317 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
324 if (header_size > buf_size - 7*s->keyframe) {
325 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326 return AVERROR_INVALIDDATA;
330 if (AV_RL24(buf) != 0x2a019d) {
331 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332 return AVERROR_INVALIDDATA;
334 width = AV_RL16(buf+3) & 0x3fff;
335 height = AV_RL16(buf+5) & 0x3fff;
336 hscale = buf[4] >> 6;
337 vscale = buf[6] >> 6;
341 if (hscale || vscale)
342 avpriv_request_sample(s->avctx, "Upscaling");
344 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 for (i = 0; i < 4; i++)
346 for (j = 0; j < 16; j++)
347 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348 sizeof(s->prob->token[i][j]));
349 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352 memset(&s->segmentation, 0, sizeof(s->segmentation));
353 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
356 ff_vp56_init_range_decoder(c, buf, header_size);
358 buf_size -= header_size;
362 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
363 vp8_rac_get(c); // whether we can skip clamping in dsp functions
366 if ((s->segmentation.enabled = vp8_rac_get(c)))
367 parse_segment_info(s);
369 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371 s->filter.simple = vp8_rac_get(c);
372 s->filter.level = vp8_rac_get_uint(c, 6);
373 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375 if ((s->lf_delta.enabled = vp8_rac_get(c)))
379 if (setup_partitions(s, buf, buf_size)) {
380 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
381 return AVERROR_INVALIDDATA;
384 if (!s->macroblocks_base || /* first frame */
385 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
386 if ((ret = update_dimensions(s, width, height)) < 0)
394 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
395 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
398 // if we aren't saving this frame's probabilities for future frames,
399 // make a copy of the current probabilities
400 if (!(s->update_probabilities = vp8_rac_get(c)))
401 s->prob[1] = s->prob[0];
403 s->update_last = s->keyframe || vp8_rac_get(c);
405 for (i = 0; i < 4; i++)
406 for (j = 0; j < 8; j++)
407 for (k = 0; k < 3; k++)
408 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
409 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
410 int prob = vp8_rac_get_uint(c, 8);
411 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
412 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415 if ((s->mbskip_enabled = vp8_rac_get(c)))
416 s->prob->mbskip = vp8_rac_get_uint(c, 8);
419 s->prob->intra = vp8_rac_get_uint(c, 8);
420 s->prob->last = vp8_rac_get_uint(c, 8);
421 s->prob->golden = vp8_rac_get_uint(c, 8);
424 for (i = 0; i < 4; i++)
425 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 for (i = 0; i < 3; i++)
428 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430 // 17.2 MV probability update
431 for (i = 0; i < 2; i++)
432 for (j = 0; j < 19; j++)
433 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
434 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
440 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
443 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
447 * Motion vector coding, 17.1.
449 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
453 if (vp56_rac_get_prob_branchy(c, p[0])) {
456 for (i = 0; i < 3; i++)
457 x += vp56_rac_get_prob(c, p[9 + i]) << i;
458 for (i = 9; i > 3; i--)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
464 const uint8_t *ps = p+2;
465 bit = vp56_rac_get_prob(c, *ps);
468 bit = vp56_rac_get_prob(c, *ps);
471 x += vp56_rac_get_prob(c, *ps);
474 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
477 static av_always_inline
478 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
481 return vp8_submv_prob[4-!!left];
483 return vp8_submv_prob[2];
484 return vp8_submv_prob[1-!!left];
488 * Split motion vector prediction, 16.4.
489 * @returns the number of motion vectors parsed (2, 4 or 16)
491 static av_always_inline
492 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
496 VP8Macroblock *top_mb;
497 VP8Macroblock *left_mb = &mb[-1];
498 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500 *mbsplits_cur, *firstidx;
502 VP56mv *left_mv = left_mb->bmv;
503 VP56mv *cur_mv = mb->bmv;
505 if (!layout) // layout is inlined, s->mb_layout is not
508 top_mb = &mb[-s->mb_width-1];
509 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
510 top_mv = top_mb->bmv;
512 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
514 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 part_idx = VP8_SPLITMVMODE_8x8;
519 part_idx = VP8_SPLITMVMODE_4x4;
522 num = vp8_mbsplit_count[part_idx];
523 mbsplits_cur = vp8_mbsplits[part_idx],
524 firstidx = vp8_mbfirstidx[part_idx];
525 mb->partitioning = part_idx;
527 for (n = 0; n < num; n++) {
529 uint32_t left, above;
530 const uint8_t *submv_prob;
533 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541 submv_prob = get_submv_prob(left, above);
543 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
544 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
546 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
547 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 AV_ZERO32(&mb->bmv[n]);
552 AV_WN32A(&mb->bmv[n], above);
555 AV_WN32A(&mb->bmv[n], left);
562 static av_always_inline
563 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 VP8Macroblock *mb_edge[3] = { 0 /* top */,
568 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571 int cur_sign_bias = s->sign_bias[mb->ref_frame];
572 int8_t *sign_bias = s->sign_bias;
574 uint8_t cnt[4] = { 0 };
575 VP56RangeCoder *c = &s->c;
577 if (!layout) { // layout is inlined (s->mb_layout is not)
582 mb_edge[0] = mb - s->mb_width-1;
583 mb_edge[2] = mb - s->mb_width-2;
586 AV_ZERO32(&near_mv[0]);
587 AV_ZERO32(&near_mv[1]);
588 AV_ZERO32(&near_mv[2]);
590 /* Process MB on top, left and top-left */
591 #define MV_EDGE_CHECK(n)\
593 VP8Macroblock *edge = mb_edge[n];\
594 int edge_ref = edge->ref_frame;\
595 if (edge_ref != VP56_FRAME_CURRENT) {\
596 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (cur_sign_bias != sign_bias[edge_ref]) {\
599 /* SWAR negate of the values in mv. */\
601 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 if (!n || mv != AV_RN32A(&near_mv[idx]))\
604 AV_WN32A(&near_mv[++idx], mv);\
605 cnt[idx] += 1 + (n != 2);\
607 cnt[CNT_ZERO] += 1 + (n != 2);\
615 mb->partitioning = VP8_SPLITMVMODE_NONE;
616 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
617 mb->mode = VP8_MVMODE_MV;
619 /* If we have three distinct MVs, merge first and last if they're the same */
620 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621 cnt[CNT_NEAREST] += 1;
623 /* Swap near and nearest if necessary */
624 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
625 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
626 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
629 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632 /* Choose the best mv out of 0,0 and the nearest mv */
633 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
635 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
636 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
639 mb->mode = VP8_MVMODE_SPLIT;
640 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
643 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
647 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
651 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
655 mb->mode = VP8_MVMODE_ZERO;
661 static av_always_inline
662 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
663 int mb_x, int keyframe, int layout)
665 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
668 VP8Macroblock *mb_top = mb - s->mb_width - 1;
669 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
674 uint8_t* const left = s->intra4x4_pred_mode_left;
676 top = mb->intra4x4_pred_mode_top;
678 top = s->intra4x4_pred_mode_top + 4 * mb_x;
679 for (y = 0; y < 4; y++) {
680 for (x = 0; x < 4; x++) {
682 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
683 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
684 left[y] = top[x] = *intra4x4;
690 for (i = 0; i < 16; i++)
691 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
695 static av_always_inline
696 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
697 uint8_t *segment, uint8_t *ref, int layout)
699 VP56RangeCoder *c = &s->c;
701 if (s->segmentation.update_map) {
702 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
703 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
704 } else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
751 #ifndef decode_block_coeffs_internal
753 * @param r arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
765 VP56RangeCoder c = *r;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782 token_prob = probs[i+1][1];
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 token_prob = probs[i+1][2];
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
904 AV_COPY128(top_border, src_y + 15*linesize);
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948 return mb_y ? mode : LEFT_DC_PRED8x8;
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958 return mb_y ? mode : HOR_PRED8x8;
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
979 return !mb_y ? DC_127_PRED8x8 : mode;
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
994 return mb_y ? mode : HOR_VP8_PRED;
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1003 if (!mb_x && mb_y) {
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1018 return !mb_x ? DC_129_PRED : mode;
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1072 AV_ZERO128(td->non_zero_count_cache);
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1084 topright = tr_right;
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1089 dst = copy_dst + 12;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1106 copy_dst[35] = 129U;
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125 nnz = td->non_zero_count_cache[y][x];
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1135 ptr += 4*s->linesize;
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 ThreadFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, ptrdiff_t linesize,
1182 vp8_mc_func mc_func[3][3])
1184 uint8_t *src = ref->f->data[0];
1187 int src_linesize = linesize;
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 32,
1200 src - my_idx * linesize - mx_idx, linesize,
1201 block_w + subpel_idx[1][mx],
1202 block_h + subpel_idx[1][my],
1203 x_off - mx_idx, y_off - my_idx, width, height);
1204 src = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1207 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1209 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1210 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1215 * chroma MC function
1217 * @param s VP8 decoding context
1218 * @param dst1 target buffer for block data at block position (U plane)
1219 * @param dst2 target buffer for block data at block position (V plane)
1220 * @param ref reference picture buffer at origin (0, 0)
1221 * @param mv motion vector (relative to block position) to get pixel data from
1222 * @param x_off horizontal position of block from origin (0, 0)
1223 * @param y_off vertical position of block from origin (0, 0)
1224 * @param block_w width of block (16, 8 or 4)
1225 * @param block_h height of block (always same as block_w)
1226 * @param width width of src/dst plane data
1227 * @param height height of src/dst plane data
1228 * @param linesize size of a single line of plane data, including padding
1229 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1231 static av_always_inline
1232 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1233 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1234 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1235 vp8_mc_func mc_func[3][3])
1237 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1240 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1241 int my = mv->y&7, my_idx = subpel_idx[0][my];
1243 x_off += mv->x >> 3;
1244 y_off += mv->y >> 3;
1247 src1 += y_off * linesize + x_off;
1248 src2 += y_off * linesize + x_off;
1249 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1250 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1251 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1252 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 32,
1253 src1 - my_idx * linesize - mx_idx, linesize,
1254 block_w + subpel_idx[1][mx],
1255 block_h + subpel_idx[1][my],
1256 x_off - mx_idx, y_off - my_idx, width, height);
1257 src1 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1258 mc_func[my_idx][mx_idx](dst1, linesize, src1, 32, block_h, mx, my);
1260 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, 32,
1261 src2 - my_idx * linesize - mx_idx, linesize,
1262 block_w + subpel_idx[1][mx],
1263 block_h + subpel_idx[1][my],
1264 x_off - mx_idx, y_off - my_idx, width, height);
1265 src2 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1266 mc_func[my_idx][mx_idx](dst2, linesize, src2, 32, block_h, mx, my);
1268 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1269 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1272 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1273 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1274 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1278 static av_always_inline
1279 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1280 ThreadFrame *ref_frame, int x_off, int y_off,
1281 int bx_off, int by_off,
1282 int block_w, int block_h,
1283 int width, int height, VP56mv *mv)
1288 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1289 ref_frame, mv, x_off + bx_off, y_off + by_off,
1290 block_w, block_h, width, height, s->linesize,
1291 s->put_pixels_tab[block_w == 8]);
1294 if (s->profile == 3) {
1298 x_off >>= 1; y_off >>= 1;
1299 bx_off >>= 1; by_off >>= 1;
1300 width >>= 1; height >>= 1;
1301 block_w >>= 1; block_h >>= 1;
1302 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1303 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1304 &uvmv, x_off + bx_off, y_off + by_off,
1305 block_w, block_h, width, height, s->uvlinesize,
1306 s->put_pixels_tab[1 + (block_w == 4)]);
1309 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1310 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1311 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1313 /* Don't prefetch refs that haven't been used very often this frame. */
1314 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1315 int x_off = mb_x << 4, y_off = mb_y << 4;
1316 int mx = (mb->mv.x>>2) + x_off + 8;
1317 int my = (mb->mv.y>>2) + y_off;
1318 uint8_t **src= s->framep[ref]->tf.f->data;
1319 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1320 /* For threading, a ff_thread_await_progress here might be useful, but
1321 * it actually slows down the decoder. Since a bad prefetch doesn't
1322 * generate bad decoder output, we don't run it here. */
1323 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1324 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1325 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1330 * Apply motion vectors to prediction buffer, chapter 18.
1332 static av_always_inline
1333 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1334 VP8Macroblock *mb, int mb_x, int mb_y)
1336 int x_off = mb_x << 4, y_off = mb_y << 4;
1337 int width = 16*s->mb_width, height = 16*s->mb_height;
1338 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1339 VP56mv *bmv = mb->bmv;
1341 switch (mb->partitioning) {
1342 case VP8_SPLITMVMODE_NONE:
1343 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1344 0, 0, 16, 16, width, height, &mb->mv);
1346 case VP8_SPLITMVMODE_4x4: {
1351 for (y = 0; y < 4; y++) {
1352 for (x = 0; x < 4; x++) {
1353 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1355 4*x + x_off, 4*y + y_off, 4, 4,
1356 width, height, s->linesize,
1357 s->put_pixels_tab[2]);
1362 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1363 for (y = 0; y < 2; y++) {
1364 for (x = 0; x < 2; x++) {
1365 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1366 mb->bmv[ 2*y * 4 + 2*x+1].x +
1367 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1368 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1369 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1370 mb->bmv[ 2*y * 4 + 2*x+1].y +
1371 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1372 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1373 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1374 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1375 if (s->profile == 3) {
1379 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1380 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1381 4*x + x_off, 4*y + y_off, 4, 4,
1382 width, height, s->uvlinesize,
1383 s->put_pixels_tab[2]);
1388 case VP8_SPLITMVMODE_16x8:
1389 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1390 0, 0, 16, 8, width, height, &bmv[0]);
1391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392 0, 8, 16, 8, width, height, &bmv[1]);
1394 case VP8_SPLITMVMODE_8x16:
1395 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396 0, 0, 8, 16, width, height, &bmv[0]);
1397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398 8, 0, 8, 16, width, height, &bmv[1]);
1400 case VP8_SPLITMVMODE_8x8:
1401 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402 0, 0, 8, 8, width, height, &bmv[0]);
1403 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1404 8, 0, 8, 8, width, height, &bmv[1]);
1405 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1406 0, 8, 8, 8, width, height, &bmv[2]);
1407 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1408 8, 8, 8, 8, width, height, &bmv[3]);
1413 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1414 uint8_t *dst[3], VP8Macroblock *mb)
1418 if (mb->mode != MODE_I4x4) {
1419 uint8_t *y_dst = dst[0];
1420 for (y = 0; y < 4; y++) {
1421 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1423 if (nnz4&~0x01010101) {
1424 for (x = 0; x < 4; x++) {
1425 if ((uint8_t)nnz4 == 1)
1426 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1427 else if((uint8_t)nnz4 > 1)
1428 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1434 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1437 y_dst += 4*s->linesize;
1441 for (ch = 0; ch < 2; ch++) {
1442 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1444 uint8_t *ch_dst = dst[1+ch];
1445 if (nnz4&~0x01010101) {
1446 for (y = 0; y < 2; y++) {
1447 for (x = 0; x < 2; x++) {
1448 if ((uint8_t)nnz4 == 1)
1449 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1450 else if((uint8_t)nnz4 > 1)
1451 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1454 goto chroma_idct_end;
1456 ch_dst += 4*s->uvlinesize;
1459 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1466 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1468 int interior_limit, filter_level;
1470 if (s->segmentation.enabled) {
1471 filter_level = s->segmentation.filter_level[mb->segment];
1472 if (!s->segmentation.absolute_vals)
1473 filter_level += s->filter.level;
1475 filter_level = s->filter.level;
1477 if (s->lf_delta.enabled) {
1478 filter_level += s->lf_delta.ref[mb->ref_frame];
1479 filter_level += s->lf_delta.mode[mb->mode];
1482 filter_level = av_clip_uintp2(filter_level, 6);
1484 interior_limit = filter_level;
1485 if (s->filter.sharpness) {
1486 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1487 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1489 interior_limit = FFMAX(interior_limit, 1);
1491 f->filter_level = filter_level;
1492 f->inner_limit = interior_limit;
1493 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1496 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1498 int mbedge_lim, bedge_lim, hev_thresh;
1499 int filter_level = f->filter_level;
1500 int inner_limit = f->inner_limit;
1501 int inner_filter = f->inner_filter;
1502 int linesize = s->linesize;
1503 int uvlinesize = s->uvlinesize;
1504 static const uint8_t hev_thresh_lut[2][64] = {
1505 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1506 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1507 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1509 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1510 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1511 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1518 bedge_lim = 2*filter_level + inner_limit;
1519 mbedge_lim = bedge_lim + 4;
1521 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1524 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1525 mbedge_lim, inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1527 mbedge_lim, inner_limit, hev_thresh);
1531 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1532 inner_limit, hev_thresh);
1533 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1534 inner_limit, hev_thresh);
1535 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1536 inner_limit, hev_thresh);
1537 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1538 uvlinesize, bedge_lim,
1539 inner_limit, hev_thresh);
1543 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1544 mbedge_lim, inner_limit, hev_thresh);
1545 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1546 mbedge_lim, inner_limit, hev_thresh);
1550 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1551 linesize, bedge_lim,
1552 inner_limit, hev_thresh);
1553 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1554 linesize, bedge_lim,
1555 inner_limit, hev_thresh);
1556 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1557 linesize, bedge_lim,
1558 inner_limit, hev_thresh);
1559 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1560 dst[2] + 4 * uvlinesize,
1561 uvlinesize, bedge_lim,
1562 inner_limit, hev_thresh);
1566 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1568 int mbedge_lim, bedge_lim;
1569 int filter_level = f->filter_level;
1570 int inner_limit = f->inner_limit;
1571 int inner_filter = f->inner_filter;
1572 int linesize = s->linesize;
1577 bedge_lim = 2*filter_level + inner_limit;
1578 mbedge_lim = bedge_lim + 4;
1581 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1583 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1584 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1585 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1589 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1591 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1592 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1593 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1597 #define MARGIN (16 << 2)
1598 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1599 VP8Frame *prev_frame)
1601 VP8Context *s = avctx->priv_data;
1604 s->mv_min.y = -MARGIN;
1605 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1606 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1607 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1608 int mb_xy = mb_y*s->mb_width;
1610 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1612 s->mv_min.x = -MARGIN;
1613 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1614 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1616 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1617 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1618 prev_frame && prev_frame->seg_map ?
1619 prev_frame->seg_map->data + mb_xy : NULL, 1);
1629 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1631 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1632 if (otd->thread_mb_pos < tmp) {\
1633 pthread_mutex_lock(&otd->lock);\
1634 td->wait_mb_pos = tmp;\
1636 if (otd->thread_mb_pos >= tmp)\
1638 pthread_cond_wait(&otd->cond, &otd->lock);\
1640 td->wait_mb_pos = INT_MAX;\
1641 pthread_mutex_unlock(&otd->lock);\
1645 #define update_pos(td, mb_y, mb_x)\
1647 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1648 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1649 int is_null = (next_td == NULL) || (prev_td == NULL);\
1650 int pos_check = (is_null) ? 1 :\
1651 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1652 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1653 td->thread_mb_pos = pos;\
1654 if (sliced_threading && pos_check) {\
1655 pthread_mutex_lock(&td->lock);\
1656 pthread_cond_broadcast(&td->cond);\
1657 pthread_mutex_unlock(&td->lock);\
1661 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1662 #define update_pos(td, mb_y, mb_x)
1665 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1666 int jobnr, int threadnr)
1668 VP8Context *s = avctx->priv_data;
1669 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1670 int mb_y = td->thread_mb_pos>>16;
1671 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1672 int num_jobs = s->num_jobs;
1673 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1674 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1677 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1678 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1679 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1681 if (mb_y == 0) prev_td = td;
1682 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1683 if (mb_y == s->mb_height-1) next_td = td;
1684 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1685 if (s->mb_layout == 1)
1686 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1688 // Make sure the previous frame has read its segmentation map,
1689 // if we re-use the same map.
1690 if (prev_frame && s->segmentation.enabled &&
1691 !s->segmentation.update_map)
1692 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1693 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1694 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1695 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1698 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1699 // left edge of 129 for intra prediction
1700 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1701 for (i = 0; i < 3; i++)
1702 for (y = 0; y < 16>>!!i; y++)
1703 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1705 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1709 s->mv_min.x = -MARGIN;
1710 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1712 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1713 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1714 if (prev_td != td) {
1715 if (threadnr != 0) {
1716 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1718 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1722 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1723 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1726 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1727 prev_frame && prev_frame->seg_map ?
1728 prev_frame->seg_map->data + mb_xy : NULL, 0);
1730 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1733 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1735 if (mb->mode <= MODE_I4x4)
1736 intra_predict(s, td, dst, mb, mb_x, mb_y);
1738 inter_predict(s, td, dst, mb, mb_x, mb_y);
1740 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1743 idct_mb(s, td, dst, mb);
1745 AV_ZERO64(td->left_nnz);
1746 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1748 // Reset DC block predictors if they would exist if the mb had coefficients
1749 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1750 td->left_nnz[8] = 0;
1751 s->top_nnz[mb_x][8] = 0;
1755 if (s->deblock_filter)
1756 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1758 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1759 if (s->filter.simple)
1760 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1762 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1765 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1773 if (mb_x == s->mb_width+1) {
1774 update_pos(td, mb_y, s->mb_width+3);
1776 update_pos(td, mb_y, mb_x);
1781 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1782 int jobnr, int threadnr)
1784 VP8Context *s = avctx->priv_data;
1785 VP8ThreadData *td = &s->thread_data[threadnr];
1786 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1787 AVFrame *curframe = s->curframe->tf.f;
1789 VP8ThreadData *prev_td, *next_td;
1791 curframe->data[0] + 16*mb_y*s->linesize,
1792 curframe->data[1] + 8*mb_y*s->uvlinesize,
1793 curframe->data[2] + 8*mb_y*s->uvlinesize
1796 if (s->mb_layout == 1)
1797 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1799 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1801 if (mb_y == 0) prev_td = td;
1802 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1803 if (mb_y == s->mb_height-1) next_td = td;
1804 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1806 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1807 VP8FilterStrength *f = &td->filter_strength[mb_x];
1808 if (prev_td != td) {
1809 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1812 if (next_td != &s->thread_data[0]) {
1813 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1816 if (num_jobs == 1) {
1817 if (s->filter.simple)
1818 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1820 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1823 if (s->filter.simple)
1824 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1826 filter_mb(s, dst, f, mb_x, mb_y);
1831 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1835 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1836 int jobnr, int threadnr)
1838 VP8Context *s = avctx->priv_data;
1839 VP8ThreadData *td = &s->thread_data[jobnr];
1840 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1841 VP8Frame *curframe = s->curframe;
1842 int mb_y, num_jobs = s->num_jobs;
1843 td->thread_nr = threadnr;
1844 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1845 if (mb_y >= s->mb_height) break;
1846 td->thread_mb_pos = mb_y<<16;
1847 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1848 if (s->deblock_filter)
1849 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1850 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1855 if (avctx->active_thread_type == FF_THREAD_FRAME)
1856 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1862 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1865 VP8Context *s = avctx->priv_data;
1866 int ret, i, referenced, num_jobs;
1867 enum AVDiscard skip_thresh;
1868 VP8Frame *av_uninit(curframe), *prev_frame;
1870 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1873 prev_frame = s->framep[VP56_FRAME_CURRENT];
1875 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1876 || s->update_altref == VP56_FRAME_CURRENT;
1878 skip_thresh = !referenced ? AVDISCARD_NONREF :
1879 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1881 if (avctx->skip_frame >= skip_thresh) {
1883 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1886 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1888 // release no longer referenced frames
1889 for (i = 0; i < 5; i++)
1890 if (s->frames[i].tf.f->data[0] &&
1891 &s->frames[i] != prev_frame &&
1892 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1893 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1894 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1895 vp8_release_frame(s, &s->frames[i]);
1897 // find a free buffer
1898 for (i = 0; i < 5; i++)
1899 if (&s->frames[i] != prev_frame &&
1900 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1901 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1902 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1903 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1907 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1910 if (curframe->tf.f->data[0])
1911 vp8_release_frame(s, curframe);
1913 // Given that arithmetic probabilities are updated every frame, it's quite likely
1914 // that the values we have on a random interframe are complete junk if we didn't
1915 // start decode on a keyframe. So just don't display anything rather than junk.
1916 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1917 !s->framep[VP56_FRAME_GOLDEN] ||
1918 !s->framep[VP56_FRAME_GOLDEN2])) {
1919 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1920 ret = AVERROR_INVALIDDATA;
1924 curframe->tf.f->key_frame = s->keyframe;
1925 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1926 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1929 // check if golden and altref are swapped
1930 if (s->update_altref != VP56_FRAME_NONE) {
1931 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1933 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1935 if (s->update_golden != VP56_FRAME_NONE) {
1936 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1938 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1940 if (s->update_last) {
1941 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1943 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1945 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1947 ff_thread_finish_setup(avctx);
1949 s->linesize = curframe->tf.f->linesize[0];
1950 s->uvlinesize = curframe->tf.f->linesize[1];
1952 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1953 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1955 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1956 if (!s->mb_layout && s->keyframe)
1957 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1959 // top edge of 127 for intra prediction
1960 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1961 s->top_border[0][15] = s->top_border[0][23] = 127;
1962 s->top_border[0][31] = 127;
1963 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1965 memset(s->ref_count, 0, sizeof(s->ref_count));
1968 if (s->mb_layout == 1) {
1969 // Make sure the previous frame has read its segmentation map,
1970 // if we re-use the same map.
1971 if (prev_frame && s->segmentation.enabled &&
1972 !s->segmentation.update_map)
1973 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1974 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1977 if (avctx->active_thread_type == FF_THREAD_FRAME)
1980 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1981 s->num_jobs = num_jobs;
1982 s->curframe = curframe;
1983 s->prev_frame = prev_frame;
1984 s->mv_min.y = -MARGIN;
1985 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1986 for (i = 0; i < MAX_THREADS; i++) {
1987 s->thread_data[i].thread_mb_pos = 0;
1988 s->thread_data[i].wait_mb_pos = INT_MAX;
1990 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1992 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1993 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1996 // if future frames don't use the updated probabilities,
1997 // reset them to the values we saved
1998 if (!s->update_probabilities)
1999 s->prob[0] = s->prob[1];
2001 if (!s->invisible) {
2002 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2009 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2013 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2015 VP8Context *s = avctx->priv_data;
2018 vp8_decode_flush_impl(avctx, 1);
2019 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2020 av_frame_free(&s->frames[i].tf.f);
2025 static av_cold int vp8_init_frames(VP8Context *s)
2028 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2029 s->frames[i].tf.f = av_frame_alloc();
2030 if (!s->frames[i].tf.f)
2031 return AVERROR(ENOMEM);
2036 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2038 VP8Context *s = avctx->priv_data;
2042 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2043 avctx->internal->allocate_progress = 1;
2045 ff_videodsp_init(&s->vdsp, 8);
2046 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2047 ff_vp8dsp_init(&s->vp8dsp);
2049 if ((ret = vp8_init_frames(s)) < 0) {
2050 ff_vp8_decode_free(avctx);
2057 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2059 VP8Context *s = avctx->priv_data;
2064 if ((ret = vp8_init_frames(s)) < 0) {
2065 ff_vp8_decode_free(avctx);
2072 #define REBASE(pic) \
2073 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2075 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2077 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2080 if (s->macroblocks_base &&
2081 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2083 s->mb_width = s_src->mb_width;
2084 s->mb_height = s_src->mb_height;
2087 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2088 s->segmentation = s_src->segmentation;
2089 s->lf_delta = s_src->lf_delta;
2090 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2092 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2093 if (s_src->frames[i].tf.f->data[0]) {
2094 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2100 s->framep[0] = REBASE(s_src->next_framep[0]);
2101 s->framep[1] = REBASE(s_src->next_framep[1]);
2102 s->framep[2] = REBASE(s_src->next_framep[2]);
2103 s->framep[3] = REBASE(s_src->next_framep[3]);
2108 AVCodec ff_vp8_decoder = {
2110 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2111 .type = AVMEDIA_TYPE_VIDEO,
2112 .id = AV_CODEC_ID_VP8,
2113 .priv_data_size = sizeof(VP8Context),
2114 .init = ff_vp8_decode_init,
2115 .close = ff_vp8_decode_free,
2116 .decode = ff_vp8_decode_frame,
2117 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2118 .flush = vp8_decode_flush,
2119 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2120 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),