2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
52 s->macroblocks = NULL;
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
88 ff_thread_release_buffer(s->avctx, f);
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
94 VP8Context *s = avctx->priv_data;
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
102 memset(s->framep, 0, sizeof(s->framep));
106 s->maps_are_invalid = 1;
110 static void vp8_decode_flush(AVCodecContext *avctx)
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
115 static int update_dimensions(VP8Context *s, int width, int height)
117 AVCodecContext *avctx = s->avctx;
120 if (width != s->avctx->width ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
127 avcodec_set_dimensions(s->avctx, width, height);
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
154 return AVERROR(ENOMEM);
156 s->macroblocks = s->macroblocks_base + 1;
161 static void parse_segment_info(VP8Context *s)
163 VP56RangeCoder *c = &s->c;
166 s->segmentation.update_map = vp8_rac_get(c);
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
182 static void update_lf_deltas(VP8Context *s)
184 VP56RangeCoder *c = &s->c;
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
208 const uint8_t *sizes = buf;
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
219 int size = AV_RL24(sizes + 3*i);
220 if (buf_size - size < 0)
223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
232 static void get_quants(VP8Context *s)
234 VP56RangeCoder *c = &s->c;
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
279 VP56RangeCoder *c = &s->c;
282 return VP56_FRAME_CURRENT;
284 switch (vp8_rac_get_uint(c, 2)) {
286 return VP56_FRAME_PREVIOUS;
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
290 return VP56_FRAME_NONE;
293 static void update_refs(VP8Context *s)
295 VP56RangeCoder *c = &s->c;
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
306 VP56RangeCoder *c = &s->c;
307 int header_size, hscale, vscale, i, j, k, l, m, ret;
308 int width = s->avctx->width;
309 int height = s->avctx->height;
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
314 header_size = AV_RL24(buf) >> 5;
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334 return AVERROR_INVALIDDATA;
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
343 if (hscale || vscale)
344 av_log_missing_feature(s->avctx, "Upscaling", 1);
346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
357 ff_vp56_init_range_decoder(c, buf, header_size);
359 buf_size -= header_size;
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
404 s->update_last = s->keyframe || vp8_rac_get(c);
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
448 * Motion vector coding, 17.1.
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
469 bit = vp56_rac_get_prob(c, *ps);
472 x += vp56_rac_get_prob(c, *ps);
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
482 return vp8_submv_prob[4-!!left];
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
501 *mbsplits_cur, *firstidx;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
506 if (!layout) // layout is inlined, s->mb_layout is not
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
517 part_idx = VP8_SPLITMVMODE_8x8;
520 part_idx = VP8_SPLITMVMODE_4x4;
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
528 for (n = 0; n < num; n++) {
530 uint32_t left, above;
531 const uint8_t *submv_prob;
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 submv_prob = get_submv_prob(left, above);
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
550 AV_ZERO32(&mb->bmv[n]);
553 AV_WN32A(&mb->bmv[n], above);
556 AV_WN32A(&mb->bmv[n], left);
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
578 if (!layout) { // layout is inlined (s->mb_layout is not)
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
656 mb->mode = VP8_MVMODE_ZERO;
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
675 uint8_t* const left = s->intra4x4_pred_mode_left;
677 top = mb->intra4x4_pred_mode_top;
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
700 VP56RangeCoder *c = &s->c;
702 if (s->segmentation.update_map)
703 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
704 else if (s->segmentation.enabled)
705 *segment = ref ? *ref : *segment;
706 mb->segment = *segment;
708 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
711 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
713 if (mb->mode == MODE_I4x4) {
714 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
717 if (s->mb_layout == 1)
718 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
720 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
721 AV_WN32A( s->intra4x4_pred_mode_left, modes);
724 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
725 mb->ref_frame = VP56_FRAME_CURRENT;
726 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
728 if (vp56_rac_get_prob_branchy(c, s->prob->last))
729 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
730 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
732 mb->ref_frame = VP56_FRAME_PREVIOUS;
733 s->ref_count[mb->ref_frame-1]++;
735 // motion vectors, 16.3
736 decode_mvs(s, mb, mb_x, mb_y, layout);
739 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
741 if (mb->mode == MODE_I4x4)
742 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
744 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
745 mb->ref_frame = VP56_FRAME_CURRENT;
746 mb->partitioning = VP8_SPLITMVMODE_NONE;
747 AV_ZERO32(&mb->bmv[0]);
751 #ifndef decode_block_coeffs_internal
753 * @param c arithmetic bitstream reader context
754 * @param block destination for block coefficients
755 * @param probs probabilities to use when reading trees from the bitstream
756 * @param i initial coeff index, 0 unless a separate DC block is coded
757 * @param qmul array holding the dc/ac dequant factor at position 0/1
758 * @return 0 if no coeffs were decoded
759 * otherwise, the index of the last coeff decoded plus one
761 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
762 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763 int i, uint8_t *token_prob, int16_t qmul[2])
765 VP56RangeCoder c = *r;
769 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
773 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775 break; // invalid input; blocks should end with EOB
776 token_prob = probs[i][0];
780 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782 token_prob = probs[i+1][1];
784 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
785 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787 coeff += vp56_rac_get_prob(&c, token_prob[5]);
791 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
793 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 } else { // DCT_CAT3 and up
800 int a = vp56_rac_get_prob(&c, token_prob[8]);
801 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 int cat = (a<<1) + b;
803 coeff = 3 + (8<<cat);
804 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 token_prob = probs[i+1][2];
809 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
818 * @param c arithmetic bitstream reader context
819 * @param block destination for block coefficients
820 * @param probs probabilities to use when reading trees from the bitstream
821 * @param i initial coeff index, 0 unless a separate DC block is coded
822 * @param zero_nhood the initial prediction context for number of surrounding
823 * all-zero blocks (only left/top, so 0-2)
824 * @param qmul array holding the dc/ac dequant factor at position 0/1
825 * @return 0 if no coeffs were decoded
826 * otherwise, the index of the last coeff decoded plus one
828 static av_always_inline
829 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
830 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 int i, int zero_nhood, int16_t qmul[2])
833 uint8_t *token_prob = probs[i][zero_nhood];
834 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
836 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
839 static av_always_inline
840 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841 uint8_t t_nnz[9], uint8_t l_nnz[9])
843 int i, x, y, luma_start = 0, luma_ctx = 3;
844 int nnz_pred, nnz, nnz_total = 0;
845 int segment = mb->segment;
848 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
849 nnz_pred = t_nnz[8] + l_nnz[8];
851 // decode DC values and do hadamard
852 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
853 s->qmat[segment].luma_dc_qmul);
854 l_nnz[8] = t_nnz[8] = !!nnz;
859 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
868 for (y = 0; y < 4; y++)
869 for (x = 0; x < 4; x++) {
870 nnz_pred = l_nnz[y] + t_nnz[x];
871 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
872 nnz_pred, s->qmat[segment].luma_qmul);
873 // nnz+block_dc may be one more than the actual last index, but we don't care
874 td->non_zero_count_cache[y][x] = nnz + block_dc;
875 t_nnz[x] = l_nnz[y] = !!nnz;
880 // TODO: what to do about dimensions? 2nd dim for luma is x,
881 // but for chroma it's (y<<1)|x
882 for (i = 4; i < 6; i++)
883 for (y = 0; y < 2; y++)
884 for (x = 0; x < 2; x++) {
885 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
886 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
887 nnz_pred, s->qmat[segment].chroma_qmul);
888 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
889 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
893 // if there were no coded coeffs despite the macroblock not being marked skip,
894 // we MUST not do the inner loop filter and should not do IDCT
895 // Since skip isn't used for bitstream prediction, just manually set it.
900 static av_always_inline
901 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
902 int linesize, int uvlinesize, int simple)
904 AV_COPY128(top_border, src_y + 15*linesize);
906 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
907 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
911 static av_always_inline
912 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
913 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
914 int simple, int xchg)
916 uint8_t *top_border_m1 = top_border-32; // for TL prediction
918 src_cb -= uvlinesize;
919 src_cr -= uvlinesize;
921 #define XCHG(a,b,xchg) do { \
922 if (xchg) AV_SWAP64(b,a); \
923 else AV_COPY64(b,a); \
926 XCHG(top_border_m1+8, src_y-8, xchg);
927 XCHG(top_border, src_y, xchg);
928 XCHG(top_border+8, src_y+8, 1);
929 if (mb_x < mb_width-1)
930 XCHG(top_border+32, src_y+16, 1);
932 // only copy chroma for normal loop filter
933 // or to initialize the top row to 127
934 if (!simple || !mb_y) {
935 XCHG(top_border_m1+16, src_cb-8, xchg);
936 XCHG(top_border_m1+24, src_cr-8, xchg);
937 XCHG(top_border+16, src_cb, 1);
938 XCHG(top_border+24, src_cr, 1);
942 static av_always_inline
943 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
946 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
948 return mb_y ? mode : LEFT_DC_PRED8x8;
952 static av_always_inline
953 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
956 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
958 return mb_y ? mode : HOR_PRED8x8;
962 static av_always_inline
963 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
965 if (mode == DC_PRED8x8) {
966 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
972 static av_always_inline
973 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
977 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
979 return !mb_y ? DC_127_PRED8x8 : mode;
981 return !mb_x ? DC_129_PRED8x8 : mode;
982 case PLANE_PRED8x8 /*TM*/:
983 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
988 static av_always_inline
989 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
992 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
994 return mb_y ? mode : HOR_VP8_PRED;
998 static av_always_inline
999 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1003 if (!mb_x && mb_y) {
1008 case DIAG_DOWN_LEFT_PRED:
1009 case VERT_LEFT_PRED:
1010 return !mb_y ? DC_127_PRED : mode;
1018 return !mb_x ? DC_129_PRED : mode;
1020 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1021 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1022 case DIAG_DOWN_RIGHT_PRED:
1023 case VERT_RIGHT_PRED:
1032 static av_always_inline
1033 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1034 VP8Macroblock *mb, int mb_x, int mb_y)
1036 AVCodecContext *avctx = s->avctx;
1037 int x, y, mode, nnz;
1040 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1041 // otherwise, skip it if we aren't going to deblock
1042 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1043 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1044 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1045 s->filter.simple, 1);
1047 if (mb->mode < MODE_I4x4) {
1048 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1049 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1051 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1053 s->hpc.pred16x16[mode](dst[0], s->linesize);
1055 uint8_t *ptr = dst[0];
1056 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1057 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1059 // all blocks on the right edge of the macroblock use bottom edge
1060 // the top macroblock for their topright edge
1061 uint8_t *tr_right = ptr - s->linesize + 16;
1063 // if we're on the right edge of the frame, said edge is extended
1064 // from the top macroblock
1065 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1066 mb_x == s->mb_width-1) {
1067 tr = tr_right[-1]*0x01010101u;
1068 tr_right = (uint8_t *)&tr;
1072 AV_ZERO128(td->non_zero_count_cache);
1074 for (y = 0; y < 4; y++) {
1075 uint8_t *topright = ptr + 4 - s->linesize;
1076 for (x = 0; x < 4; x++) {
1077 int copy = 0, linesize = s->linesize;
1078 uint8_t *dst = ptr+4*x;
1079 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1081 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1084 topright = tr_right;
1086 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1087 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1089 dst = copy_dst + 12;
1093 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1099 copy_dst[3] = ptr[4*x-s->linesize-1];
1106 copy_dst[35] = 129U;
1108 copy_dst[11] = ptr[4*x -1];
1109 copy_dst[19] = ptr[4*x+s->linesize -1];
1110 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1111 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1117 s->hpc.pred4x4[mode](dst, topright, linesize);
1119 AV_COPY32(ptr+4*x , copy_dst+12);
1120 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1121 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1122 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125 nnz = td->non_zero_count_cache[y][x];
1128 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1130 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1135 ptr += 4*s->linesize;
1140 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1141 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1146 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1149 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1150 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1151 s->filter.simple, 0);
1154 static const uint8_t subpel_idx[3][8] = {
1155 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1156 // also function pointer index
1157 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1158 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1164 * @param s VP8 decoding context
1165 * @param dst target buffer for block data at block position
1166 * @param ref reference picture buffer at origin (0, 0)
1167 * @param mv motion vector (relative to block position) to get pixel data from
1168 * @param x_off horizontal position of block from origin (0, 0)
1169 * @param y_off vertical position of block from origin (0, 0)
1170 * @param block_w width of block (16, 8 or 4)
1171 * @param block_h height of block (always same as block_w)
1172 * @param width width of src/dst plane data
1173 * @param height height of src/dst plane data
1174 * @param linesize size of a single line of plane data, including padding
1175 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1177 static av_always_inline
1178 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1179 AVFrame *ref, const VP56mv *mv,
1180 int x_off, int y_off, int block_w, int block_h,
1181 int width, int height, int linesize,
1182 vp8_mc_func mc_func[3][3])
1184 uint8_t *src = ref->data[0];
1188 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1189 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1191 x_off += mv->x >> 2;
1192 y_off += mv->y >> 2;
1195 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1196 src += y_off * linesize + x_off;
1197 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1198 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1199 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1200 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1201 x_off - mx_idx, y_off - my_idx, width, height);
1202 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1204 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1206 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1207 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1212 * chroma MC function
1214 * @param s VP8 decoding context
1215 * @param dst1 target buffer for block data at block position (U plane)
1216 * @param dst2 target buffer for block data at block position (V plane)
1217 * @param ref reference picture buffer at origin (0, 0)
1218 * @param mv motion vector (relative to block position) to get pixel data from
1219 * @param x_off horizontal position of block from origin (0, 0)
1220 * @param y_off vertical position of block from origin (0, 0)
1221 * @param block_w width of block (16, 8 or 4)
1222 * @param block_h height of block (always same as block_w)
1223 * @param width width of src/dst plane data
1224 * @param height height of src/dst plane data
1225 * @param linesize size of a single line of plane data, including padding
1226 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1228 static av_always_inline
1229 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1230 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1231 int block_w, int block_h, int width, int height, int linesize,
1232 vp8_mc_func mc_func[3][3])
1234 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1237 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1238 int my = mv->y&7, my_idx = subpel_idx[0][my];
1240 x_off += mv->x >> 3;
1241 y_off += mv->y >> 3;
1244 src1 += y_off * linesize + x_off;
1245 src2 += y_off * linesize + x_off;
1246 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1247 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1248 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1249 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1250 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1251 x_off - mx_idx, y_off - my_idx, width, height);
1252 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1253 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1255 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1256 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1257 x_off - mx_idx, y_off - my_idx, width, height);
1258 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1259 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1261 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1262 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1265 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1266 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1267 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1271 static av_always_inline
1272 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1273 AVFrame *ref_frame, int x_off, int y_off,
1274 int bx_off, int by_off,
1275 int block_w, int block_h,
1276 int width, int height, VP56mv *mv)
1281 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1282 ref_frame, mv, x_off + bx_off, y_off + by_off,
1283 block_w, block_h, width, height, s->linesize,
1284 s->put_pixels_tab[block_w == 8]);
1287 if (s->profile == 3) {
1291 x_off >>= 1; y_off >>= 1;
1292 bx_off >>= 1; by_off >>= 1;
1293 width >>= 1; height >>= 1;
1294 block_w >>= 1; block_h >>= 1;
1295 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1296 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1297 &uvmv, x_off + bx_off, y_off + by_off,
1298 block_w, block_h, width, height, s->uvlinesize,
1299 s->put_pixels_tab[1 + (block_w == 4)]);
1302 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1303 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1304 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1306 /* Don't prefetch refs that haven't been used very often this frame. */
1307 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1308 int x_off = mb_x << 4, y_off = mb_y << 4;
1309 int mx = (mb->mv.x>>2) + x_off + 8;
1310 int my = (mb->mv.y>>2) + y_off;
1311 uint8_t **src= s->framep[ref]->data;
1312 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1313 /* For threading, a ff_thread_await_progress here might be useful, but
1314 * it actually slows down the decoder. Since a bad prefetch doesn't
1315 * generate bad decoder output, we don't run it here. */
1316 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1317 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1318 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1323 * Apply motion vectors to prediction buffer, chapter 18.
1325 static av_always_inline
1326 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1327 VP8Macroblock *mb, int mb_x, int mb_y)
1329 int x_off = mb_x << 4, y_off = mb_y << 4;
1330 int width = 16*s->mb_width, height = 16*s->mb_height;
1331 AVFrame *ref = s->framep[mb->ref_frame];
1332 VP56mv *bmv = mb->bmv;
1334 switch (mb->partitioning) {
1335 case VP8_SPLITMVMODE_NONE:
1336 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1337 0, 0, 16, 16, width, height, &mb->mv);
1339 case VP8_SPLITMVMODE_4x4: {
1344 for (y = 0; y < 4; y++) {
1345 for (x = 0; x < 4; x++) {
1346 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1348 4*x + x_off, 4*y + y_off, 4, 4,
1349 width, height, s->linesize,
1350 s->put_pixels_tab[2]);
1355 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1356 for (y = 0; y < 2; y++) {
1357 for (x = 0; x < 2; x++) {
1358 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1359 mb->bmv[ 2*y * 4 + 2*x+1].x +
1360 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1361 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1362 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1363 mb->bmv[ 2*y * 4 + 2*x+1].y +
1364 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1365 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1366 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1367 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1368 if (s->profile == 3) {
1372 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1373 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1374 4*x + x_off, 4*y + y_off, 4, 4,
1375 width, height, s->uvlinesize,
1376 s->put_pixels_tab[2]);
1381 case VP8_SPLITMVMODE_16x8:
1382 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383 0, 0, 16, 8, width, height, &bmv[0]);
1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385 0, 8, 16, 8, width, height, &bmv[1]);
1387 case VP8_SPLITMVMODE_8x16:
1388 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389 0, 0, 8, 16, width, height, &bmv[0]);
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 8, 0, 8, 16, width, height, &bmv[1]);
1393 case VP8_SPLITMVMODE_8x8:
1394 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395 0, 0, 8, 8, width, height, &bmv[0]);
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 8, 0, 8, 8, width, height, &bmv[1]);
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 0, 8, 8, 8, width, height, &bmv[2]);
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 8, 8, 8, 8, width, height, &bmv[3]);
1406 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1407 uint8_t *dst[3], VP8Macroblock *mb)
1411 if (mb->mode != MODE_I4x4) {
1412 uint8_t *y_dst = dst[0];
1413 for (y = 0; y < 4; y++) {
1414 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1416 if (nnz4&~0x01010101) {
1417 for (x = 0; x < 4; x++) {
1418 if ((uint8_t)nnz4 == 1)
1419 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1420 else if((uint8_t)nnz4 > 1)
1421 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1427 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1430 y_dst += 4*s->linesize;
1434 for (ch = 0; ch < 2; ch++) {
1435 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1437 uint8_t *ch_dst = dst[1+ch];
1438 if (nnz4&~0x01010101) {
1439 for (y = 0; y < 2; y++) {
1440 for (x = 0; x < 2; x++) {
1441 if ((uint8_t)nnz4 == 1)
1442 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1443 else if((uint8_t)nnz4 > 1)
1444 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1447 goto chroma_idct_end;
1449 ch_dst += 4*s->uvlinesize;
1452 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1459 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1461 int interior_limit, filter_level;
1463 if (s->segmentation.enabled) {
1464 filter_level = s->segmentation.filter_level[mb->segment];
1465 if (!s->segmentation.absolute_vals)
1466 filter_level += s->filter.level;
1468 filter_level = s->filter.level;
1470 if (s->lf_delta.enabled) {
1471 filter_level += s->lf_delta.ref[mb->ref_frame];
1472 filter_level += s->lf_delta.mode[mb->mode];
1475 filter_level = av_clip_uintp2(filter_level, 6);
1477 interior_limit = filter_level;
1478 if (s->filter.sharpness) {
1479 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1480 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1482 interior_limit = FFMAX(interior_limit, 1);
1484 f->filter_level = filter_level;
1485 f->inner_limit = interior_limit;
1486 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1489 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1491 int mbedge_lim, bedge_lim, hev_thresh;
1492 int filter_level = f->filter_level;
1493 int inner_limit = f->inner_limit;
1494 int inner_filter = f->inner_filter;
1495 int linesize = s->linesize;
1496 int uvlinesize = s->uvlinesize;
1497 static const uint8_t hev_thresh_lut[2][64] = {
1498 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1499 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1500 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1502 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1504 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1511 bedge_lim = 2*filter_level + inner_limit;
1512 mbedge_lim = bedge_lim + 4;
1514 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1517 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1518 mbedge_lim, inner_limit, hev_thresh);
1519 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1520 mbedge_lim, inner_limit, hev_thresh);
1524 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1525 inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1531 uvlinesize, bedge_lim,
1532 inner_limit, hev_thresh);
1536 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1537 mbedge_lim, inner_limit, hev_thresh);
1538 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1539 mbedge_lim, inner_limit, hev_thresh);
1543 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1544 linesize, bedge_lim,
1545 inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1547 linesize, bedge_lim,
1548 inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1553 dst[2] + 4 * uvlinesize,
1554 uvlinesize, bedge_lim,
1555 inner_limit, hev_thresh);
1559 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1561 int mbedge_lim, bedge_lim;
1562 int filter_level = f->filter_level;
1563 int inner_limit = f->inner_limit;
1564 int inner_filter = f->inner_filter;
1565 int linesize = s->linesize;
1570 bedge_lim = 2*filter_level + inner_limit;
1571 mbedge_lim = bedge_lim + 4;
1574 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1582 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1590 static void release_queued_segmaps(VP8Context *s, int is_close)
1592 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1593 while (s->num_maps_to_be_freed > leave_behind)
1594 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1595 s->maps_are_invalid = 0;
1598 #define MARGIN (16 << 2)
1599 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1600 AVFrame *prev_frame)
1602 VP8Context *s = avctx->priv_data;
1605 s->mv_min.y = -MARGIN;
1606 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1607 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1608 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1609 int mb_xy = mb_y*s->mb_width;
1611 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1613 s->mv_min.x = -MARGIN;
1614 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1615 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1617 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1618 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1619 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1629 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1631 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1632 if (otd->thread_mb_pos < tmp) {\
1633 pthread_mutex_lock(&otd->lock);\
1634 td->wait_mb_pos = tmp;\
1636 if (otd->thread_mb_pos >= tmp)\
1638 pthread_cond_wait(&otd->cond, &otd->lock);\
1640 td->wait_mb_pos = INT_MAX;\
1641 pthread_mutex_unlock(&otd->lock);\
1645 #define update_pos(td, mb_y, mb_x)\
1647 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1648 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1649 int is_null = (next_td == NULL) || (prev_td == NULL);\
1650 int pos_check = (is_null) ? 1 :\
1651 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1652 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1653 td->thread_mb_pos = pos;\
1654 if (sliced_threading && pos_check) {\
1655 pthread_mutex_lock(&td->lock);\
1656 pthread_cond_broadcast(&td->cond);\
1657 pthread_mutex_unlock(&td->lock);\
1661 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1662 #define update_pos(td, mb_y, mb_x)
1665 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1666 int jobnr, int threadnr)
1668 VP8Context *s = avctx->priv_data;
1669 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1670 int mb_y = td->thread_mb_pos>>16;
1671 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1672 int num_jobs = s->num_jobs;
1673 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1674 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1677 curframe->data[0] + 16*mb_y*s->linesize,
1678 curframe->data[1] + 8*mb_y*s->uvlinesize,
1679 curframe->data[2] + 8*mb_y*s->uvlinesize
1681 if (mb_y == 0) prev_td = td;
1682 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1683 if (mb_y == s->mb_height-1) next_td = td;
1684 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1685 if (s->mb_layout == 1)
1686 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1688 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1689 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1690 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1693 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1694 // left edge of 129 for intra prediction
1695 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1696 for (i = 0; i < 3; i++)
1697 for (y = 0; y < 16>>!!i; y++)
1698 dst[i][y*curframe->linesize[i]-1] = 129;
1700 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1704 s->mv_min.x = -MARGIN;
1705 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1707 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1708 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1709 if (prev_td != td) {
1710 if (threadnr != 0) {
1711 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1713 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1717 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1718 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1721 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1722 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1724 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1727 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1729 if (mb->mode <= MODE_I4x4)
1730 intra_predict(s, td, dst, mb, mb_x, mb_y);
1732 inter_predict(s, td, dst, mb, mb_x, mb_y);
1734 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1737 idct_mb(s, td, dst, mb);
1739 AV_ZERO64(td->left_nnz);
1740 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1742 // Reset DC block predictors if they would exist if the mb had coefficients
1743 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1744 td->left_nnz[8] = 0;
1745 s->top_nnz[mb_x][8] = 0;
1749 if (s->deblock_filter)
1750 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1752 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1753 if (s->filter.simple)
1754 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1756 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1759 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1767 if (mb_x == s->mb_width+1) {
1768 update_pos(td, mb_y, s->mb_width+3);
1770 update_pos(td, mb_y, mb_x);
1775 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1776 int jobnr, int threadnr)
1778 VP8Context *s = avctx->priv_data;
1779 VP8ThreadData *td = &s->thread_data[threadnr];
1780 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1781 AVFrame *curframe = s->curframe;
1783 VP8ThreadData *prev_td, *next_td;
1785 curframe->data[0] + 16*mb_y*s->linesize,
1786 curframe->data[1] + 8*mb_y*s->uvlinesize,
1787 curframe->data[2] + 8*mb_y*s->uvlinesize
1790 if (s->mb_layout == 1)
1791 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1793 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1795 if (mb_y == 0) prev_td = td;
1796 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1797 if (mb_y == s->mb_height-1) next_td = td;
1798 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1800 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1801 VP8FilterStrength *f = &td->filter_strength[mb_x];
1802 if (prev_td != td) {
1803 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1806 if (next_td != &s->thread_data[0]) {
1807 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1810 if (num_jobs == 1) {
1811 if (s->filter.simple)
1812 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1814 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1817 if (s->filter.simple)
1818 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1820 filter_mb(s, dst, f, mb_x, mb_y);
1825 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1829 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1830 int jobnr, int threadnr)
1832 VP8Context *s = avctx->priv_data;
1833 VP8ThreadData *td = &s->thread_data[jobnr];
1834 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1835 AVFrame *curframe = s->curframe;
1836 int mb_y, num_jobs = s->num_jobs;
1837 td->thread_nr = threadnr;
1838 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1839 if (mb_y >= s->mb_height) break;
1840 td->thread_mb_pos = mb_y<<16;
1841 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1842 if (s->deblock_filter)
1843 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1844 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1849 if (avctx->active_thread_type == FF_THREAD_FRAME)
1850 ff_thread_report_progress(curframe, mb_y, 0);
1856 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1859 VP8Context *s = avctx->priv_data;
1860 int ret, i, referenced, num_jobs;
1861 enum AVDiscard skip_thresh;
1862 AVFrame *av_uninit(curframe), *prev_frame;
1864 release_queued_segmaps(s, 0);
1866 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1869 prev_frame = s->framep[VP56_FRAME_CURRENT];
1871 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1872 || s->update_altref == VP56_FRAME_CURRENT;
1874 skip_thresh = !referenced ? AVDISCARD_NONREF :
1875 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1877 if (avctx->skip_frame >= skip_thresh) {
1879 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1882 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1884 // release no longer referenced frames
1885 for (i = 0; i < 5; i++)
1886 if (s->frames[i].data[0] &&
1887 &s->frames[i] != prev_frame &&
1888 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1889 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1891 vp8_release_frame(s, &s->frames[i], 1, 0);
1893 // find a free buffer
1894 for (i = 0; i < 5; i++)
1895 if (&s->frames[i] != prev_frame &&
1896 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1897 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1898 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1899 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1903 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1906 if (curframe->data[0])
1907 vp8_release_frame(s, curframe, 1, 0);
1909 // Given that arithmetic probabilities are updated every frame, it's quite likely
1910 // that the values we have on a random interframe are complete junk if we didn't
1911 // start decode on a keyframe. So just don't display anything rather than junk.
1912 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1913 !s->framep[VP56_FRAME_GOLDEN] ||
1914 !s->framep[VP56_FRAME_GOLDEN2])) {
1915 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1916 ret = AVERROR_INVALIDDATA;
1920 curframe->key_frame = s->keyframe;
1921 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1922 curframe->reference = referenced ? 3 : 0;
1923 if ((ret = vp8_alloc_frame(s, curframe))) {
1924 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1928 // check if golden and altref are swapped
1929 if (s->update_altref != VP56_FRAME_NONE) {
1930 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1934 if (s->update_golden != VP56_FRAME_NONE) {
1935 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1939 if (s->update_last) {
1940 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1942 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1944 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1946 ff_thread_finish_setup(avctx);
1948 s->linesize = curframe->linesize[0];
1949 s->uvlinesize = curframe->linesize[1];
1951 if (!s->thread_data[0].edge_emu_buffer)
1952 for (i = 0; i < MAX_THREADS; i++)
1953 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1955 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1956 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1958 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1959 if (!s->mb_layout && s->keyframe)
1960 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1962 // top edge of 127 for intra prediction
1963 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1964 s->top_border[0][15] = s->top_border[0][23] = 127;
1965 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1967 memset(s->ref_count, 0, sizeof(s->ref_count));
1970 // Make sure the previous frame has read its segmentation map,
1971 // if we re-use the same map.
1972 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1973 ff_thread_await_progress(prev_frame, 1, 0);
1975 if (s->mb_layout == 1)
1976 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1978 if (avctx->active_thread_type == FF_THREAD_FRAME)
1981 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1982 s->num_jobs = num_jobs;
1983 s->curframe = curframe;
1984 s->prev_frame = prev_frame;
1985 s->mv_min.y = -MARGIN;
1986 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1987 for (i = 0; i < MAX_THREADS; i++) {
1988 s->thread_data[i].thread_mb_pos = 0;
1989 s->thread_data[i].wait_mb_pos = INT_MAX;
1991 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1993 ff_thread_report_progress(curframe, INT_MAX, 0);
1994 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1997 // if future frames don't use the updated probabilities,
1998 // reset them to the values we saved
1999 if (!s->update_probabilities)
2000 s->prob[0] = s->prob[1];
2002 if (!s->invisible) {
2003 *(AVFrame*)data = *curframe;
2004 *data_size = sizeof(AVFrame);
2009 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2013 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2015 VP8Context *s = avctx->priv_data;
2018 avctx->pix_fmt = PIX_FMT_YUV420P;
2020 ff_dsputil_init(&s->dsp, avctx);
2021 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
2022 ff_vp8dsp_init(&s->vp8dsp);
2027 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2029 vp8_decode_flush_impl(avctx, 0, 1, 1);
2030 release_queued_segmaps(avctx->priv_data, 1);
2034 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2036 VP8Context *s = avctx->priv_data;
2043 #define REBASE(pic) \
2044 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2046 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2048 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2050 if (s->macroblocks_base &&
2051 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2053 s->maps_are_invalid = 1;
2054 s->mb_width = s_src->mb_width;
2055 s->mb_height = s_src->mb_height;
2058 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2059 s->segmentation = s_src->segmentation;
2060 s->lf_delta = s_src->lf_delta;
2061 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2063 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2064 s->framep[0] = REBASE(s_src->next_framep[0]);
2065 s->framep[1] = REBASE(s_src->next_framep[1]);
2066 s->framep[2] = REBASE(s_src->next_framep[2]);
2067 s->framep[3] = REBASE(s_src->next_framep[3]);
2072 AVCodec ff_vp8_decoder = {
2074 .type = AVMEDIA_TYPE_VIDEO,
2076 .priv_data_size = sizeof(VP8Context),
2077 .init = vp8_decode_init,
2078 .close = vp8_decode_free,
2079 .decode = vp8_decode_frame,
2080 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2081 .flush = vp8_decode_flush,
2082 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2083 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2084 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),