2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
49 av_freep(&s->thread_data);
50 av_freep(&s->macroblocks_base);
51 av_freep(&s->intra4x4_pred_mode_top);
52 av_freep(&s->top_nnz);
53 av_freep(&s->top_border);
55 s->macroblocks = NULL;
58 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
61 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
62 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
64 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
65 ff_thread_release_buffer(s->avctx, &f->tf);
66 return AVERROR(ENOMEM);
71 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
73 av_buffer_unref(&f->seg_map);
74 ff_thread_release_buffer(s->avctx, &f->tf);
77 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
81 vp8_release_frame(s, dst);
83 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
86 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
87 vp8_release_frame(s, dst);
88 return AVERROR(ENOMEM);
95 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
97 VP8Context *s = avctx->priv_data;
100 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
101 vp8_release_frame(s, &s->frames[i]);
102 memset(s->framep, 0, sizeof(s->framep));
108 static void vp8_decode_flush(AVCodecContext *avctx)
110 vp8_decode_flush_impl(avctx, 0);
113 static int update_dimensions(VP8Context *s, int width, int height)
115 AVCodecContext *avctx = s->avctx;
118 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
119 height != s->avctx->height) {
120 vp8_decode_flush_impl(s->avctx, 1);
122 ret = ff_set_dimensions(s->avctx, width, height);
127 s->mb_width = (s->avctx->coded_width +15) / 16;
128 s->mb_height = (s->avctx->coded_height+15) / 16;
130 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
131 if (!s->mb_layout) { // Frame threading and one thread
132 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
133 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
135 else // Sliced threading
136 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
137 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
138 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
139 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
141 for (i = 0; i < MAX_THREADS; i++) {
142 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
144 pthread_mutex_init(&s->thread_data[i].lock, NULL);
145 pthread_cond_init(&s->thread_data[i].cond, NULL);
149 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
150 (!s->intra4x4_pred_mode_top && !s->mb_layout))
151 return AVERROR(ENOMEM);
153 s->macroblocks = s->macroblocks_base + 1;
158 static void parse_segment_info(VP8Context *s)
160 VP56RangeCoder *c = &s->c;
163 s->segmentation.update_map = vp8_rac_get(c);
165 if (vp8_rac_get(c)) { // update segment feature data
166 s->segmentation.absolute_vals = vp8_rac_get(c);
168 for (i = 0; i < 4; i++)
169 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
171 for (i = 0; i < 4; i++)
172 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
174 if (s->segmentation.update_map)
175 for (i = 0; i < 3; i++)
176 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
179 static void update_lf_deltas(VP8Context *s)
181 VP56RangeCoder *c = &s->c;
184 for (i = 0; i < 4; i++) {
185 if (vp8_rac_get(c)) {
186 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
189 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
193 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
194 if (vp8_rac_get(c)) {
195 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
198 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
203 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
205 const uint8_t *sizes = buf;
208 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
210 buf += 3*(s->num_coeff_partitions-1);
211 buf_size -= 3*(s->num_coeff_partitions-1);
215 for (i = 0; i < s->num_coeff_partitions-1; i++) {
216 int size = AV_RL24(sizes + 3*i);
217 if (buf_size - size < 0)
220 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
224 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
229 static void get_quants(VP8Context *s)
231 VP56RangeCoder *c = &s->c;
234 int yac_qi = vp8_rac_get_uint(c, 7);
235 int ydc_delta = vp8_rac_get_sint(c, 4);
236 int y2dc_delta = vp8_rac_get_sint(c, 4);
237 int y2ac_delta = vp8_rac_get_sint(c, 4);
238 int uvdc_delta = vp8_rac_get_sint(c, 4);
239 int uvac_delta = vp8_rac_get_sint(c, 4);
241 for (i = 0; i < 4; i++) {
242 if (s->segmentation.enabled) {
243 base_qi = s->segmentation.base_quant[i];
244 if (!s->segmentation.absolute_vals)
249 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
250 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
251 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
252 /* 101581>>16 is equivalent to 155/100 */
253 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
254 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
255 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
257 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
258 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
263 * Determine which buffers golden and altref should be updated with after this frame.
264 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
266 * Intra frames update all 3 references
267 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
268 * If the update (golden|altref) flag is set, it's updated with the current frame
269 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
270 * If the flag is not set, the number read means:
272 * 1: VP56_FRAME_PREVIOUS
273 * 2: update golden with altref, or update altref with golden
275 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
277 VP56RangeCoder *c = &s->c;
280 return VP56_FRAME_CURRENT;
282 switch (vp8_rac_get_uint(c, 2)) {
284 return VP56_FRAME_PREVIOUS;
286 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
288 return VP56_FRAME_NONE;
291 static void update_refs(VP8Context *s)
293 VP56RangeCoder *c = &s->c;
295 int update_golden = vp8_rac_get(c);
296 int update_altref = vp8_rac_get(c);
298 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
299 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
302 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
304 VP56RangeCoder *c = &s->c;
305 int header_size, hscale, vscale, i, j, k, l, m, ret;
306 int width = s->avctx->width;
307 int height = s->avctx->height;
309 s->keyframe = !(buf[0] & 1);
310 s->profile = (buf[0]>>1) & 7;
311 s->invisible = !(buf[0] & 0x10);
312 header_size = AV_RL24(buf) >> 5;
317 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
320 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
321 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
324 if (header_size > buf_size - 7*s->keyframe) {
325 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
326 return AVERROR_INVALIDDATA;
330 if (AV_RL24(buf) != 0x2a019d) {
331 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
332 return AVERROR_INVALIDDATA;
334 width = AV_RL16(buf+3) & 0x3fff;
335 height = AV_RL16(buf+5) & 0x3fff;
336 hscale = buf[4] >> 6;
337 vscale = buf[6] >> 6;
341 if (hscale || vscale)
342 avpriv_request_sample(s->avctx, "Upscaling");
344 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 for (i = 0; i < 4; i++)
346 for (j = 0; j < 16; j++)
347 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
348 sizeof(s->prob->token[i][j]));
349 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
350 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
351 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
352 memset(&s->segmentation, 0, sizeof(s->segmentation));
353 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
356 ff_vp56_init_range_decoder(c, buf, header_size);
358 buf_size -= header_size;
362 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
363 vp8_rac_get(c); // whether we can skip clamping in dsp functions
366 if ((s->segmentation.enabled = vp8_rac_get(c)))
367 parse_segment_info(s);
369 s->segmentation.update_map = 0; // FIXME: move this to some init function?
371 s->filter.simple = vp8_rac_get(c);
372 s->filter.level = vp8_rac_get_uint(c, 6);
373 s->filter.sharpness = vp8_rac_get_uint(c, 3);
375 if ((s->lf_delta.enabled = vp8_rac_get(c)))
379 if (setup_partitions(s, buf, buf_size)) {
380 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
381 return AVERROR_INVALIDDATA;
384 if (!s->macroblocks_base || /* first frame */
385 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
386 if ((ret = update_dimensions(s, width, height)) < 0)
394 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
395 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
398 // if we aren't saving this frame's probabilities for future frames,
399 // make a copy of the current probabilities
400 if (!(s->update_probabilities = vp8_rac_get(c)))
401 s->prob[1] = s->prob[0];
403 s->update_last = s->keyframe || vp8_rac_get(c);
405 for (i = 0; i < 4; i++)
406 for (j = 0; j < 8; j++)
407 for (k = 0; k < 3; k++)
408 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
409 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
410 int prob = vp8_rac_get_uint(c, 8);
411 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
412 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415 if ((s->mbskip_enabled = vp8_rac_get(c)))
416 s->prob->mbskip = vp8_rac_get_uint(c, 8);
419 s->prob->intra = vp8_rac_get_uint(c, 8);
420 s->prob->last = vp8_rac_get_uint(c, 8);
421 s->prob->golden = vp8_rac_get_uint(c, 8);
424 for (i = 0; i < 4; i++)
425 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
427 for (i = 0; i < 3; i++)
428 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
430 // 17.2 MV probability update
431 for (i = 0; i < 2; i++)
432 for (j = 0; j < 19; j++)
433 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
434 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
440 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
442 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
443 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
447 * Motion vector coding, 17.1.
449 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
453 if (vp56_rac_get_prob_branchy(c, p[0])) {
456 for (i = 0; i < 3; i++)
457 x += vp56_rac_get_prob(c, p[9 + i]) << i;
458 for (i = 9; i > 3; i--)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
464 const uint8_t *ps = p+2;
465 bit = vp56_rac_get_prob(c, *ps);
468 bit = vp56_rac_get_prob(c, *ps);
471 x += vp56_rac_get_prob(c, *ps);
474 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
477 static av_always_inline
478 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
481 return vp8_submv_prob[4-!!left];
483 return vp8_submv_prob[2];
484 return vp8_submv_prob[1-!!left];
488 * Split motion vector prediction, 16.4.
489 * @returns the number of motion vectors parsed (2, 4 or 16)
491 static av_always_inline
492 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
496 VP8Macroblock *top_mb;
497 VP8Macroblock *left_mb = &mb[-1];
498 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
500 *mbsplits_cur, *firstidx;
502 VP56mv *left_mv = left_mb->bmv;
503 VP56mv *cur_mv = mb->bmv;
505 if (!layout) // layout is inlined, s->mb_layout is not
508 top_mb = &mb[-s->mb_width-1];
509 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
510 top_mv = top_mb->bmv;
512 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
514 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
516 part_idx = VP8_SPLITMVMODE_8x8;
519 part_idx = VP8_SPLITMVMODE_4x4;
522 num = vp8_mbsplit_count[part_idx];
523 mbsplits_cur = vp8_mbsplits[part_idx],
524 firstidx = vp8_mbfirstidx[part_idx];
525 mb->partitioning = part_idx;
527 for (n = 0; n < num; n++) {
529 uint32_t left, above;
530 const uint8_t *submv_prob;
533 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
535 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
537 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
539 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
541 submv_prob = get_submv_prob(left, above);
543 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
544 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
546 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
547 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
549 AV_ZERO32(&mb->bmv[n]);
552 AV_WN32A(&mb->bmv[n], above);
555 AV_WN32A(&mb->bmv[n], left);
562 static av_always_inline
563 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
565 VP8Macroblock *mb_edge[3] = { 0 /* top */,
568 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
571 int cur_sign_bias = s->sign_bias[mb->ref_frame];
572 int8_t *sign_bias = s->sign_bias;
574 uint8_t cnt[4] = { 0 };
575 VP56RangeCoder *c = &s->c;
577 if (!layout) { // layout is inlined (s->mb_layout is not)
582 mb_edge[0] = mb - s->mb_width-1;
583 mb_edge[2] = mb - s->mb_width-2;
586 AV_ZERO32(&near_mv[0]);
587 AV_ZERO32(&near_mv[1]);
588 AV_ZERO32(&near_mv[2]);
590 /* Process MB on top, left and top-left */
591 #define MV_EDGE_CHECK(n)\
593 VP8Macroblock *edge = mb_edge[n];\
594 int edge_ref = edge->ref_frame;\
595 if (edge_ref != VP56_FRAME_CURRENT) {\
596 uint32_t mv = AV_RN32A(&edge->mv);\
598 if (cur_sign_bias != sign_bias[edge_ref]) {\
599 /* SWAR negate of the values in mv. */\
601 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
603 if (!n || mv != AV_RN32A(&near_mv[idx]))\
604 AV_WN32A(&near_mv[++idx], mv);\
605 cnt[idx] += 1 + (n != 2);\
607 cnt[CNT_ZERO] += 1 + (n != 2);\
615 mb->partitioning = VP8_SPLITMVMODE_NONE;
616 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
617 mb->mode = VP8_MVMODE_MV;
619 /* If we have three distinct MVs, merge first and last if they're the same */
620 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621 cnt[CNT_NEAREST] += 1;
623 /* Swap near and nearest if necessary */
624 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
625 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
626 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
629 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
632 /* Choose the best mv out of 0,0 and the nearest mv */
633 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
635 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
636 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
638 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
639 mb->mode = VP8_MVMODE_SPLIT;
640 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
642 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
643 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
647 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
651 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
655 mb->mode = VP8_MVMODE_ZERO;
661 static av_always_inline
662 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
663 int mb_x, int keyframe, int layout)
665 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
668 VP8Macroblock *mb_top = mb - s->mb_width - 1;
669 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
674 uint8_t* const left = s->intra4x4_pred_mode_left;
676 top = mb->intra4x4_pred_mode_top;
678 top = s->intra4x4_pred_mode_top + 4 * mb_x;
679 for (y = 0; y < 4; y++) {
680 for (x = 0; x < 4; x++) {
682 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
683 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
684 left[y] = top[x] = *intra4x4;
690 for (i = 0; i < 16; i++)
691 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
695 static av_always_inline
696 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
697 uint8_t *segment, uint8_t *ref, int layout)
699 VP56RangeCoder *c = &s->c;
701 if (s->segmentation.update_map) {
702 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
703 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
704 } else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
751 #ifndef decode_block_coeffs_internal
753 * @param r arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
765 VP56RangeCoder c = *r;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782 token_prob = probs[i+1][1];
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 token_prob = probs[i+1][2];
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
904 AV_COPY128(top_border, src_y + 15*linesize);
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948 return mb_y ? mode : LEFT_DC_PRED8x8;
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958 return mb_y ? mode : HOR_PRED8x8;
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
979 return !mb_y ? DC_127_PRED8x8 : mode;
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
994 return mb_y ? mode : HOR_VP8_PRED;
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1003 if (!mb_x && mb_y) {
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1018 return !mb_x ? DC_129_PRED : mode;
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1072 AV_ZERO128(td->non_zero_count_cache);
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1084 topright = tr_right;
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1089 dst = copy_dst + 12;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1106 copy_dst[35] = 129U;
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125 nnz = td->non_zero_count_cache[y][x];
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1135 ptr += 4*s->linesize;
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 ThreadFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, ptrdiff_t linesize,
1182 vp8_mc_func mc_func[3][3])
1184 uint8_t *src = ref->f->data[0];
1187 int src_linesize = linesize;
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1200 src - my_idx * linesize - mx_idx,
1202 block_w + subpel_idx[1][mx],
1203 block_h + subpel_idx[1][my],
1204 x_off - mx_idx, y_off - my_idx, width, height);
1205 src = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1208 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1210 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1211 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1216 * chroma MC function
1218 * @param s VP8 decoding context
1219 * @param dst1 target buffer for block data at block position (U plane)
1220 * @param dst2 target buffer for block data at block position (V plane)
1221 * @param ref reference picture buffer at origin (0, 0)
1222 * @param mv motion vector (relative to block position) to get pixel data from
1223 * @param x_off horizontal position of block from origin (0, 0)
1224 * @param y_off vertical position of block from origin (0, 0)
1225 * @param block_w width of block (16, 8 or 4)
1226 * @param block_h height of block (always same as block_w)
1227 * @param width width of src/dst plane data
1228 * @param height height of src/dst plane data
1229 * @param linesize size of a single line of plane data, including padding
1230 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1232 static av_always_inline
1233 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1234 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1235 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1236 vp8_mc_func mc_func[3][3])
1238 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1241 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1242 int my = mv->y&7, my_idx = subpel_idx[0][my];
1244 x_off += mv->x >> 3;
1245 y_off += mv->y >> 3;
1248 src1 += y_off * linesize + x_off;
1249 src2 += y_off * linesize + x_off;
1250 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1251 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1252 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1253 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1254 src1 - my_idx * linesize - mx_idx,
1256 block_w + subpel_idx[1][mx],
1257 block_h + subpel_idx[1][my],
1258 x_off - mx_idx, y_off - my_idx, width, height);
1259 src1 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1260 mc_func[my_idx][mx_idx](dst1, linesize, src1, 32, block_h, mx, my);
1262 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1263 src2 - my_idx * linesize - mx_idx,
1265 block_w + subpel_idx[1][mx],
1266 block_h + subpel_idx[1][my],
1267 x_off - mx_idx, y_off - my_idx, width, height);
1268 src2 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
1269 mc_func[my_idx][mx_idx](dst2, linesize, src2, 32, block_h, mx, my);
1271 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1272 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1275 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1276 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1277 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1281 static av_always_inline
1282 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1283 ThreadFrame *ref_frame, int x_off, int y_off,
1284 int bx_off, int by_off,
1285 int block_w, int block_h,
1286 int width, int height, VP56mv *mv)
1291 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1292 ref_frame, mv, x_off + bx_off, y_off + by_off,
1293 block_w, block_h, width, height, s->linesize,
1294 s->put_pixels_tab[block_w == 8]);
1297 if (s->profile == 3) {
1301 x_off >>= 1; y_off >>= 1;
1302 bx_off >>= 1; by_off >>= 1;
1303 width >>= 1; height >>= 1;
1304 block_w >>= 1; block_h >>= 1;
1305 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1306 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1307 &uvmv, x_off + bx_off, y_off + by_off,
1308 block_w, block_h, width, height, s->uvlinesize,
1309 s->put_pixels_tab[1 + (block_w == 4)]);
1312 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1313 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1314 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1316 /* Don't prefetch refs that haven't been used very often this frame. */
1317 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1318 int x_off = mb_x << 4, y_off = mb_y << 4;
1319 int mx = (mb->mv.x>>2) + x_off + 8;
1320 int my = (mb->mv.y>>2) + y_off;
1321 uint8_t **src= s->framep[ref]->tf.f->data;
1322 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1323 /* For threading, a ff_thread_await_progress here might be useful, but
1324 * it actually slows down the decoder. Since a bad prefetch doesn't
1325 * generate bad decoder output, we don't run it here. */
1326 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1327 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1328 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1333 * Apply motion vectors to prediction buffer, chapter 18.
1335 static av_always_inline
1336 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1337 VP8Macroblock *mb, int mb_x, int mb_y)
1339 int x_off = mb_x << 4, y_off = mb_y << 4;
1340 int width = 16*s->mb_width, height = 16*s->mb_height;
1341 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1342 VP56mv *bmv = mb->bmv;
1344 switch (mb->partitioning) {
1345 case VP8_SPLITMVMODE_NONE:
1346 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1347 0, 0, 16, 16, width, height, &mb->mv);
1349 case VP8_SPLITMVMODE_4x4: {
1354 for (y = 0; y < 4; y++) {
1355 for (x = 0; x < 4; x++) {
1356 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1358 4*x + x_off, 4*y + y_off, 4, 4,
1359 width, height, s->linesize,
1360 s->put_pixels_tab[2]);
1365 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1366 for (y = 0; y < 2; y++) {
1367 for (x = 0; x < 2; x++) {
1368 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1369 mb->bmv[ 2*y * 4 + 2*x+1].x +
1370 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1371 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1372 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1373 mb->bmv[ 2*y * 4 + 2*x+1].y +
1374 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1375 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1376 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1377 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1378 if (s->profile == 3) {
1382 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1383 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1384 4*x + x_off, 4*y + y_off, 4, 4,
1385 width, height, s->uvlinesize,
1386 s->put_pixels_tab[2]);
1391 case VP8_SPLITMVMODE_16x8:
1392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393 0, 0, 16, 8, width, height, &bmv[0]);
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 0, 8, 16, 8, width, height, &bmv[1]);
1397 case VP8_SPLITMVMODE_8x16:
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 0, 0, 8, 16, width, height, &bmv[0]);
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 8, 0, 8, 16, width, height, &bmv[1]);
1403 case VP8_SPLITMVMODE_8x8:
1404 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1405 0, 0, 8, 8, width, height, &bmv[0]);
1406 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1407 8, 0, 8, 8, width, height, &bmv[1]);
1408 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1409 0, 8, 8, 8, width, height, &bmv[2]);
1410 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1411 8, 8, 8, 8, width, height, &bmv[3]);
1416 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1417 uint8_t *dst[3], VP8Macroblock *mb)
1421 if (mb->mode != MODE_I4x4) {
1422 uint8_t *y_dst = dst[0];
1423 for (y = 0; y < 4; y++) {
1424 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1426 if (nnz4&~0x01010101) {
1427 for (x = 0; x < 4; x++) {
1428 if ((uint8_t)nnz4 == 1)
1429 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1430 else if((uint8_t)nnz4 > 1)
1431 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1437 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1440 y_dst += 4*s->linesize;
1444 for (ch = 0; ch < 2; ch++) {
1445 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1447 uint8_t *ch_dst = dst[1+ch];
1448 if (nnz4&~0x01010101) {
1449 for (y = 0; y < 2; y++) {
1450 for (x = 0; x < 2; x++) {
1451 if ((uint8_t)nnz4 == 1)
1452 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1453 else if((uint8_t)nnz4 > 1)
1454 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1457 goto chroma_idct_end;
1459 ch_dst += 4*s->uvlinesize;
1462 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1469 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1471 int interior_limit, filter_level;
1473 if (s->segmentation.enabled) {
1474 filter_level = s->segmentation.filter_level[mb->segment];
1475 if (!s->segmentation.absolute_vals)
1476 filter_level += s->filter.level;
1478 filter_level = s->filter.level;
1480 if (s->lf_delta.enabled) {
1481 filter_level += s->lf_delta.ref[mb->ref_frame];
1482 filter_level += s->lf_delta.mode[mb->mode];
1485 filter_level = av_clip_uintp2(filter_level, 6);
1487 interior_limit = filter_level;
1488 if (s->filter.sharpness) {
1489 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1490 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1492 interior_limit = FFMAX(interior_limit, 1);
1494 f->filter_level = filter_level;
1495 f->inner_limit = interior_limit;
1496 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1499 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1501 int mbedge_lim, bedge_lim, hev_thresh;
1502 int filter_level = f->filter_level;
1503 int inner_limit = f->inner_limit;
1504 int inner_filter = f->inner_filter;
1505 int linesize = s->linesize;
1506 int uvlinesize = s->uvlinesize;
1507 static const uint8_t hev_thresh_lut[2][64] = {
1508 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1509 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1510 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1512 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1513 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1514 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1521 bedge_lim = 2*filter_level + inner_limit;
1522 mbedge_lim = bedge_lim + 4;
1524 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1527 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1528 mbedge_lim, inner_limit, hev_thresh);
1529 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1530 mbedge_lim, inner_limit, hev_thresh);
1534 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1535 inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1537 inner_limit, hev_thresh);
1538 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1539 inner_limit, hev_thresh);
1540 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1541 uvlinesize, bedge_lim,
1542 inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1547 mbedge_lim, inner_limit, hev_thresh);
1548 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1549 mbedge_lim, inner_limit, hev_thresh);
1553 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1554 linesize, bedge_lim,
1555 inner_limit, hev_thresh);
1556 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1557 linesize, bedge_lim,
1558 inner_limit, hev_thresh);
1559 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1560 linesize, bedge_lim,
1561 inner_limit, hev_thresh);
1562 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1563 dst[2] + 4 * uvlinesize,
1564 uvlinesize, bedge_lim,
1565 inner_limit, hev_thresh);
1569 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1571 int mbedge_lim, bedge_lim;
1572 int filter_level = f->filter_level;
1573 int inner_limit = f->inner_limit;
1574 int inner_filter = f->inner_filter;
1575 int linesize = s->linesize;
1580 bedge_lim = 2*filter_level + inner_limit;
1581 mbedge_lim = bedge_lim + 4;
1584 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1586 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1587 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1588 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1592 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1594 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1595 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1596 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1600 #define MARGIN (16 << 2)
1601 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1602 VP8Frame *prev_frame)
1604 VP8Context *s = avctx->priv_data;
1607 s->mv_min.y = -MARGIN;
1608 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1609 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1610 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1611 int mb_xy = mb_y*s->mb_width;
1613 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1615 s->mv_min.x = -MARGIN;
1616 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1617 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1619 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1620 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1621 prev_frame && prev_frame->seg_map ?
1622 prev_frame->seg_map->data + mb_xy : NULL, 1);
1632 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1634 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1635 if (otd->thread_mb_pos < tmp) {\
1636 pthread_mutex_lock(&otd->lock);\
1637 td->wait_mb_pos = tmp;\
1639 if (otd->thread_mb_pos >= tmp)\
1641 pthread_cond_wait(&otd->cond, &otd->lock);\
1643 td->wait_mb_pos = INT_MAX;\
1644 pthread_mutex_unlock(&otd->lock);\
1648 #define update_pos(td, mb_y, mb_x)\
1650 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1651 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1652 int is_null = (next_td == NULL) || (prev_td == NULL);\
1653 int pos_check = (is_null) ? 1 :\
1654 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1655 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1656 td->thread_mb_pos = pos;\
1657 if (sliced_threading && pos_check) {\
1658 pthread_mutex_lock(&td->lock);\
1659 pthread_cond_broadcast(&td->cond);\
1660 pthread_mutex_unlock(&td->lock);\
1664 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1665 #define update_pos(td, mb_y, mb_x)
1668 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1669 int jobnr, int threadnr)
1671 VP8Context *s = avctx->priv_data;
1672 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1673 int mb_y = td->thread_mb_pos>>16;
1674 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1675 int num_jobs = s->num_jobs;
1676 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1677 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1680 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1681 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1682 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1684 if (mb_y == 0) prev_td = td;
1685 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1686 if (mb_y == s->mb_height-1) next_td = td;
1687 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1688 if (s->mb_layout == 1)
1689 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1691 // Make sure the previous frame has read its segmentation map,
1692 // if we re-use the same map.
1693 if (prev_frame && s->segmentation.enabled &&
1694 !s->segmentation.update_map)
1695 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
1696 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1697 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1698 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1701 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1702 // left edge of 129 for intra prediction
1703 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1704 for (i = 0; i < 3; i++)
1705 for (y = 0; y < 16>>!!i; y++)
1706 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1708 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1712 s->mv_min.x = -MARGIN;
1713 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1715 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1716 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1717 if (prev_td != td) {
1718 if (threadnr != 0) {
1719 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1721 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1725 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1726 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1729 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1730 prev_frame && prev_frame->seg_map ?
1731 prev_frame->seg_map->data + mb_xy : NULL, 0);
1733 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1736 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1738 if (mb->mode <= MODE_I4x4)
1739 intra_predict(s, td, dst, mb, mb_x, mb_y);
1741 inter_predict(s, td, dst, mb, mb_x, mb_y);
1743 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1746 idct_mb(s, td, dst, mb);
1748 AV_ZERO64(td->left_nnz);
1749 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1751 // Reset DC block predictors if they would exist if the mb had coefficients
1752 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1753 td->left_nnz[8] = 0;
1754 s->top_nnz[mb_x][8] = 0;
1758 if (s->deblock_filter)
1759 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1761 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1762 if (s->filter.simple)
1763 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1765 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1768 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1776 if (mb_x == s->mb_width+1) {
1777 update_pos(td, mb_y, s->mb_width+3);
1779 update_pos(td, mb_y, mb_x);
1784 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1785 int jobnr, int threadnr)
1787 VP8Context *s = avctx->priv_data;
1788 VP8ThreadData *td = &s->thread_data[threadnr];
1789 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1790 AVFrame *curframe = s->curframe->tf.f;
1792 VP8ThreadData *prev_td, *next_td;
1794 curframe->data[0] + 16*mb_y*s->linesize,
1795 curframe->data[1] + 8*mb_y*s->uvlinesize,
1796 curframe->data[2] + 8*mb_y*s->uvlinesize
1799 if (s->mb_layout == 1)
1800 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1802 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1804 if (mb_y == 0) prev_td = td;
1805 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1806 if (mb_y == s->mb_height-1) next_td = td;
1807 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1809 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1810 VP8FilterStrength *f = &td->filter_strength[mb_x];
1811 if (prev_td != td) {
1812 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1815 if (next_td != &s->thread_data[0]) {
1816 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1819 if (num_jobs == 1) {
1820 if (s->filter.simple)
1821 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1823 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1826 if (s->filter.simple)
1827 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1829 filter_mb(s, dst, f, mb_x, mb_y);
1834 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1838 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1839 int jobnr, int threadnr)
1841 VP8Context *s = avctx->priv_data;
1842 VP8ThreadData *td = &s->thread_data[jobnr];
1843 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1844 VP8Frame *curframe = s->curframe;
1845 int mb_y, num_jobs = s->num_jobs;
1846 td->thread_nr = threadnr;
1847 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1848 if (mb_y >= s->mb_height) break;
1849 td->thread_mb_pos = mb_y<<16;
1850 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1851 if (s->deblock_filter)
1852 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1853 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1858 if (avctx->active_thread_type == FF_THREAD_FRAME)
1859 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1865 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1868 VP8Context *s = avctx->priv_data;
1869 int ret, i, referenced, num_jobs;
1870 enum AVDiscard skip_thresh;
1871 VP8Frame *av_uninit(curframe), *prev_frame;
1873 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1876 prev_frame = s->framep[VP56_FRAME_CURRENT];
1878 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1879 || s->update_altref == VP56_FRAME_CURRENT;
1881 skip_thresh = !referenced ? AVDISCARD_NONREF :
1882 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1884 if (avctx->skip_frame >= skip_thresh) {
1886 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1889 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1891 // release no longer referenced frames
1892 for (i = 0; i < 5; i++)
1893 if (s->frames[i].tf.f->data[0] &&
1894 &s->frames[i] != prev_frame &&
1895 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1896 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1897 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1898 vp8_release_frame(s, &s->frames[i]);
1900 // find a free buffer
1901 for (i = 0; i < 5; i++)
1902 if (&s->frames[i] != prev_frame &&
1903 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1904 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1905 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1906 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1910 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1913 if (curframe->tf.f->data[0])
1914 vp8_release_frame(s, curframe);
1916 // Given that arithmetic probabilities are updated every frame, it's quite likely
1917 // that the values we have on a random interframe are complete junk if we didn't
1918 // start decode on a keyframe. So just don't display anything rather than junk.
1919 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1920 !s->framep[VP56_FRAME_GOLDEN] ||
1921 !s->framep[VP56_FRAME_GOLDEN2])) {
1922 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1923 ret = AVERROR_INVALIDDATA;
1927 curframe->tf.f->key_frame = s->keyframe;
1928 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1929 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1932 // check if golden and altref are swapped
1933 if (s->update_altref != VP56_FRAME_NONE) {
1934 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1936 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1938 if (s->update_golden != VP56_FRAME_NONE) {
1939 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1941 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1943 if (s->update_last) {
1944 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1946 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1948 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1950 ff_thread_finish_setup(avctx);
1952 s->linesize = curframe->tf.f->linesize[0];
1953 s->uvlinesize = curframe->tf.f->linesize[1];
1955 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1956 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1958 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1959 if (!s->mb_layout && s->keyframe)
1960 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1962 // top edge of 127 for intra prediction
1963 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1964 s->top_border[0][15] = s->top_border[0][23] = 127;
1965 s->top_border[0][31] = 127;
1966 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1968 memset(s->ref_count, 0, sizeof(s->ref_count));
1971 if (s->mb_layout == 1) {
1972 // Make sure the previous frame has read its segmentation map,
1973 // if we re-use the same map.
1974 if (prev_frame && s->segmentation.enabled &&
1975 !s->segmentation.update_map)
1976 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1977 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1980 if (avctx->active_thread_type == FF_THREAD_FRAME)
1983 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1984 s->num_jobs = num_jobs;
1985 s->curframe = curframe;
1986 s->prev_frame = prev_frame;
1987 s->mv_min.y = -MARGIN;
1988 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1989 for (i = 0; i < MAX_THREADS; i++) {
1990 s->thread_data[i].thread_mb_pos = 0;
1991 s->thread_data[i].wait_mb_pos = INT_MAX;
1993 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1995 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1996 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1999 // if future frames don't use the updated probabilities,
2000 // reset them to the values we saved
2001 if (!s->update_probabilities)
2002 s->prob[0] = s->prob[1];
2004 if (!s->invisible) {
2005 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2012 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2016 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2018 VP8Context *s = avctx->priv_data;
2021 vp8_decode_flush_impl(avctx, 1);
2022 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2023 av_frame_free(&s->frames[i].tf.f);
2028 static av_cold int vp8_init_frames(VP8Context *s)
2031 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2032 s->frames[i].tf.f = av_frame_alloc();
2033 if (!s->frames[i].tf.f)
2034 return AVERROR(ENOMEM);
2039 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2041 VP8Context *s = avctx->priv_data;
2045 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2046 avctx->internal->allocate_progress = 1;
2048 ff_videodsp_init(&s->vdsp, 8);
2049 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2050 ff_vp8dsp_init(&s->vp8dsp);
2052 if ((ret = vp8_init_frames(s)) < 0) {
2053 ff_vp8_decode_free(avctx);
2060 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2062 VP8Context *s = avctx->priv_data;
2067 if ((ret = vp8_init_frames(s)) < 0) {
2068 ff_vp8_decode_free(avctx);
2075 #define REBASE(pic) \
2076 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2078 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2080 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2083 if (s->macroblocks_base &&
2084 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2086 s->mb_width = s_src->mb_width;
2087 s->mb_height = s_src->mb_height;
2090 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2091 s->segmentation = s_src->segmentation;
2092 s->lf_delta = s_src->lf_delta;
2093 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2095 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2096 if (s_src->frames[i].tf.f->data[0]) {
2097 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2103 s->framep[0] = REBASE(s_src->next_framep[0]);
2104 s->framep[1] = REBASE(s_src->next_framep[1]);
2105 s->framep[2] = REBASE(s_src->next_framep[2]);
2106 s->framep[3] = REBASE(s_src->next_framep[3]);
2111 AVCodec ff_vp8_decoder = {
2113 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2114 .type = AVMEDIA_TYPE_VIDEO,
2115 .id = AV_CODEC_ID_VP8,
2116 .priv_data_size = sizeof(VP8Context),
2117 .init = ff_vp8_decode_init,
2118 .close = ff_vp8_decode_free,
2119 .decode = ff_vp8_decode_frame,
2120 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2121 .flush = vp8_decode_flush,
2122 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2123 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),