2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
52 s->macroblocks = NULL;
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
88 ff_thread_release_buffer(s->avctx, f);
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
94 VP8Context *s = avctx->priv_data;
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
102 memset(s->framep, 0, sizeof(s->framep));
106 s->maps_are_invalid = 1;
110 static void vp8_decode_flush(AVCodecContext *avctx)
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
115 static int update_dimensions(VP8Context *s, int width, int height)
117 AVCodecContext *avctx = s->avctx;
120 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
127 avcodec_set_dimensions(s->avctx, width, height);
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
154 return AVERROR(ENOMEM);
156 s->macroblocks = s->macroblocks_base + 1;
161 static void parse_segment_info(VP8Context *s)
163 VP56RangeCoder *c = &s->c;
166 s->segmentation.update_map = vp8_rac_get(c);
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
182 static void update_lf_deltas(VP8Context *s)
184 VP56RangeCoder *c = &s->c;
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
208 const uint8_t *sizes = buf;
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
219 int size = AV_RL24(sizes + 3*i);
220 if (buf_size - size < 0)
223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
232 static void get_quants(VP8Context *s)
234 VP56RangeCoder *c = &s->c;
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
279 VP56RangeCoder *c = &s->c;
282 return VP56_FRAME_CURRENT;
284 switch (vp8_rac_get_uint(c, 2)) {
286 return VP56_FRAME_PREVIOUS;
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
290 return VP56_FRAME_NONE;
293 static void update_refs(VP8Context *s)
295 VP56RangeCoder *c = &s->c;
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
306 VP56RangeCoder *c = &s->c;
307 int header_size, hscale, vscale, i, j, k, l, m, ret;
308 int width = s->avctx->width;
309 int height = s->avctx->height;
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
314 header_size = AV_RL24(buf) >> 5;
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334 return AVERROR_INVALIDDATA;
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
343 if (hscale || vscale)
344 av_log_missing_feature(s->avctx, "Upscaling", 1);
346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
357 ff_vp56_init_range_decoder(c, buf, header_size);
359 buf_size -= header_size;
363 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
364 vp8_rac_get(c); // whether we can skip clamping in dsp functions
367 if ((s->segmentation.enabled = vp8_rac_get(c)))
368 parse_segment_info(s);
370 s->segmentation.update_map = 0; // FIXME: move this to some init function?
372 s->filter.simple = vp8_rac_get(c);
373 s->filter.level = vp8_rac_get_uint(c, 6);
374 s->filter.sharpness = vp8_rac_get_uint(c, 3);
376 if ((s->lf_delta.enabled = vp8_rac_get(c)))
380 if (setup_partitions(s, buf, buf_size)) {
381 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
382 return AVERROR_INVALIDDATA;
385 if (!s->macroblocks_base || /* first frame */
386 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
387 if ((ret = update_dimensions(s, width, height)) < 0)
395 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
396 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
399 // if we aren't saving this frame's probabilities for future frames,
400 // make a copy of the current probabilities
401 if (!(s->update_probabilities = vp8_rac_get(c)))
402 s->prob[1] = s->prob[0];
404 s->update_last = s->keyframe || vp8_rac_get(c);
406 for (i = 0; i < 4; i++)
407 for (j = 0; j < 8; j++)
408 for (k = 0; k < 3; k++)
409 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
410 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
411 int prob = vp8_rac_get_uint(c, 8);
412 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
413 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416 if ((s->mbskip_enabled = vp8_rac_get(c)))
417 s->prob->mbskip = vp8_rac_get_uint(c, 8);
420 s->prob->intra = vp8_rac_get_uint(c, 8);
421 s->prob->last = vp8_rac_get_uint(c, 8);
422 s->prob->golden = vp8_rac_get_uint(c, 8);
425 for (i = 0; i < 4; i++)
426 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
428 for (i = 0; i < 3; i++)
429 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
431 // 17.2 MV probability update
432 for (i = 0; i < 2; i++)
433 for (j = 0; j < 19; j++)
434 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
435 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
441 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
443 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
444 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
448 * Motion vector coding, 17.1.
450 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
454 if (vp56_rac_get_prob_branchy(c, p[0])) {
457 for (i = 0; i < 3; i++)
458 x += vp56_rac_get_prob(c, p[9 + i]) << i;
459 for (i = 9; i > 3; i--)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
465 const uint8_t *ps = p+2;
466 bit = vp56_rac_get_prob(c, *ps);
469 bit = vp56_rac_get_prob(c, *ps);
472 x += vp56_rac_get_prob(c, *ps);
475 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
478 static av_always_inline
479 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
482 return vp8_submv_prob[4-!!left];
484 return vp8_submv_prob[2];
485 return vp8_submv_prob[1-!!left];
489 * Split motion vector prediction, 16.4.
490 * @returns the number of motion vectors parsed (2, 4 or 16)
492 static av_always_inline
493 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
497 VP8Macroblock *top_mb;
498 VP8Macroblock *left_mb = &mb[-1];
499 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
501 *mbsplits_cur, *firstidx;
503 VP56mv *left_mv = left_mb->bmv;
504 VP56mv *cur_mv = mb->bmv;
506 if (!layout) // layout is inlined, s->mb_layout is not
509 top_mb = &mb[-s->mb_width-1];
510 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
511 top_mv = top_mb->bmv;
513 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
515 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
517 part_idx = VP8_SPLITMVMODE_8x8;
520 part_idx = VP8_SPLITMVMODE_4x4;
523 num = vp8_mbsplit_count[part_idx];
524 mbsplits_cur = vp8_mbsplits[part_idx],
525 firstidx = vp8_mbfirstidx[part_idx];
526 mb->partitioning = part_idx;
528 for (n = 0; n < num; n++) {
530 uint32_t left, above;
531 const uint8_t *submv_prob;
534 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
536 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
538 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
540 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 submv_prob = get_submv_prob(left, above);
544 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
545 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
547 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
548 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
550 AV_ZERO32(&mb->bmv[n]);
553 AV_WN32A(&mb->bmv[n], above);
556 AV_WN32A(&mb->bmv[n], left);
563 static av_always_inline
564 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566 VP8Macroblock *mb_edge[3] = { 0 /* top */,
569 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
570 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 int cur_sign_bias = s->sign_bias[mb->ref_frame];
573 int8_t *sign_bias = s->sign_bias;
575 uint8_t cnt[4] = { 0 };
576 VP56RangeCoder *c = &s->c;
578 if (!layout) { // layout is inlined (s->mb_layout is not)
583 mb_edge[0] = mb - s->mb_width-1;
584 mb_edge[2] = mb - s->mb_width-2;
587 AV_ZERO32(&near_mv[0]);
588 AV_ZERO32(&near_mv[1]);
589 AV_ZERO32(&near_mv[2]);
591 /* Process MB on top, left and top-left */
592 #define MV_EDGE_CHECK(n)\
594 VP8Macroblock *edge = mb_edge[n];\
595 int edge_ref = edge->ref_frame;\
596 if (edge_ref != VP56_FRAME_CURRENT) {\
597 uint32_t mv = AV_RN32A(&edge->mv);\
599 if (cur_sign_bias != sign_bias[edge_ref]) {\
600 /* SWAR negate of the values in mv. */\
602 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
604 if (!n || mv != AV_RN32A(&near_mv[idx]))\
605 AV_WN32A(&near_mv[++idx], mv);\
606 cnt[idx] += 1 + (n != 2);\
608 cnt[CNT_ZERO] += 1 + (n != 2);\
616 mb->partitioning = VP8_SPLITMVMODE_NONE;
617 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
618 mb->mode = VP8_MVMODE_MV;
620 /* If we have three distinct MVs, merge first and last if they're the same */
621 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
622 cnt[CNT_NEAREST] += 1;
624 /* Swap near and nearest if necessary */
625 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
626 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
627 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
630 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
633 /* Choose the best mv out of 0,0 and the nearest mv */
634 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
635 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
636 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
637 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
640 mb->mode = VP8_MVMODE_SPLIT;
641 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
644 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
648 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
652 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
656 mb->mode = VP8_MVMODE_ZERO;
662 static av_always_inline
663 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
664 int mb_x, int keyframe, int layout)
666 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
669 VP8Macroblock *mb_top = mb - s->mb_width - 1;
670 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
675 uint8_t* const left = s->intra4x4_pred_mode_left;
677 top = mb->intra4x4_pred_mode_top;
679 top = s->intra4x4_pred_mode_top + 4 * mb_x;
680 for (y = 0; y < 4; y++) {
681 for (x = 0; x < 4; x++) {
683 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
684 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
685 left[y] = top[x] = *intra4x4;
691 for (i = 0; i < 16; i++)
692 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
696 static av_always_inline
697 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
698 uint8_t *segment, uint8_t *ref, int layout)
700 VP56RangeCoder *c = &s->c;
702 if (s->segmentation.update_map) {
703 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
704 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
705 } else if (s->segmentation.enabled)
706 *segment = ref ? *ref : *segment;
707 mb->segment = *segment;
709 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
712 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
714 if (mb->mode == MODE_I4x4) {
715 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
717 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
718 if (s->mb_layout == 1)
719 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
721 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
722 AV_WN32A( s->intra4x4_pred_mode_left, modes);
725 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
726 mb->ref_frame = VP56_FRAME_CURRENT;
727 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
729 if (vp56_rac_get_prob_branchy(c, s->prob->last))
730 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
731 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
733 mb->ref_frame = VP56_FRAME_PREVIOUS;
734 s->ref_count[mb->ref_frame-1]++;
736 // motion vectors, 16.3
737 decode_mvs(s, mb, mb_x, mb_y, layout);
740 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
742 if (mb->mode == MODE_I4x4)
743 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
745 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
746 mb->ref_frame = VP56_FRAME_CURRENT;
747 mb->partitioning = VP8_SPLITMVMODE_NONE;
748 AV_ZERO32(&mb->bmv[0]);
752 #ifndef decode_block_coeffs_internal
754 * @param c arithmetic bitstream reader context
755 * @param block destination for block coefficients
756 * @param probs probabilities to use when reading trees from the bitstream
757 * @param i initial coeff index, 0 unless a separate DC block is coded
758 * @param qmul array holding the dc/ac dequant factor at position 0/1
759 * @return 0 if no coeffs were decoded
760 * otherwise, the index of the last coeff decoded plus one
762 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
763 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
764 int i, uint8_t *token_prob, int16_t qmul[2])
766 VP56RangeCoder c = *r;
770 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
774 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
776 break; // invalid input; blocks should end with EOB
777 token_prob = probs[i][0];
781 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
783 token_prob = probs[i+1][1];
785 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
786 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
788 coeff += vp56_rac_get_prob(&c, token_prob[5]);
792 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
793 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
794 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
797 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
798 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
800 } else { // DCT_CAT3 and up
801 int a = vp56_rac_get_prob(&c, token_prob[8]);
802 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
803 int cat = (a<<1) + b;
804 coeff = 3 + (8<<cat);
805 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
808 token_prob = probs[i+1][2];
810 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
819 * @param c arithmetic bitstream reader context
820 * @param block destination for block coefficients
821 * @param probs probabilities to use when reading trees from the bitstream
822 * @param i initial coeff index, 0 unless a separate DC block is coded
823 * @param zero_nhood the initial prediction context for number of surrounding
824 * all-zero blocks (only left/top, so 0-2)
825 * @param qmul array holding the dc/ac dequant factor at position 0/1
826 * @return 0 if no coeffs were decoded
827 * otherwise, the index of the last coeff decoded plus one
829 static av_always_inline
830 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
831 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
832 int i, int zero_nhood, int16_t qmul[2])
834 uint8_t *token_prob = probs[i][zero_nhood];
835 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
837 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
840 static av_always_inline
841 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
842 uint8_t t_nnz[9], uint8_t l_nnz[9])
844 int i, x, y, luma_start = 0, luma_ctx = 3;
845 int nnz_pred, nnz, nnz_total = 0;
846 int segment = mb->segment;
849 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
850 nnz_pred = t_nnz[8] + l_nnz[8];
852 // decode DC values and do hadamard
853 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
854 s->qmat[segment].luma_dc_qmul);
855 l_nnz[8] = t_nnz[8] = !!nnz;
860 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
862 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
869 for (y = 0; y < 4; y++)
870 for (x = 0; x < 4; x++) {
871 nnz_pred = l_nnz[y] + t_nnz[x];
872 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
873 nnz_pred, s->qmat[segment].luma_qmul);
874 // nnz+block_dc may be one more than the actual last index, but we don't care
875 td->non_zero_count_cache[y][x] = nnz + block_dc;
876 t_nnz[x] = l_nnz[y] = !!nnz;
881 // TODO: what to do about dimensions? 2nd dim for luma is x,
882 // but for chroma it's (y<<1)|x
883 for (i = 4; i < 6; i++)
884 for (y = 0; y < 2; y++)
885 for (x = 0; x < 2; x++) {
886 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
887 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
888 nnz_pred, s->qmat[segment].chroma_qmul);
889 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
890 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
894 // if there were no coded coeffs despite the macroblock not being marked skip,
895 // we MUST not do the inner loop filter and should not do IDCT
896 // Since skip isn't used for bitstream prediction, just manually set it.
901 static av_always_inline
902 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
903 int linesize, int uvlinesize, int simple)
905 AV_COPY128(top_border, src_y + 15*linesize);
907 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
908 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
912 static av_always_inline
913 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
914 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
915 int simple, int xchg)
917 uint8_t *top_border_m1 = top_border-32; // for TL prediction
919 src_cb -= uvlinesize;
920 src_cr -= uvlinesize;
922 #define XCHG(a,b,xchg) do { \
923 if (xchg) AV_SWAP64(b,a); \
924 else AV_COPY64(b,a); \
927 XCHG(top_border_m1+8, src_y-8, xchg);
928 XCHG(top_border, src_y, xchg);
929 XCHG(top_border+8, src_y+8, 1);
930 if (mb_x < mb_width-1)
931 XCHG(top_border+32, src_y+16, 1);
933 // only copy chroma for normal loop filter
934 // or to initialize the top row to 127
935 if (!simple || !mb_y) {
936 XCHG(top_border_m1+16, src_cb-8, xchg);
937 XCHG(top_border_m1+24, src_cr-8, xchg);
938 XCHG(top_border+16, src_cb, 1);
939 XCHG(top_border+24, src_cr, 1);
943 static av_always_inline
944 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
947 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
949 return mb_y ? mode : LEFT_DC_PRED8x8;
953 static av_always_inline
954 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
957 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
959 return mb_y ? mode : HOR_PRED8x8;
963 static av_always_inline
964 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
966 if (mode == DC_PRED8x8) {
967 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
973 static av_always_inline
974 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
978 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
980 return !mb_y ? DC_127_PRED8x8 : mode;
982 return !mb_x ? DC_129_PRED8x8 : mode;
983 case PLANE_PRED8x8 /*TM*/:
984 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
989 static av_always_inline
990 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
993 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
995 return mb_y ? mode : HOR_VP8_PRED;
999 static av_always_inline
1000 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1004 if (!mb_x && mb_y) {
1009 case DIAG_DOWN_LEFT_PRED:
1010 case VERT_LEFT_PRED:
1011 return !mb_y ? DC_127_PRED : mode;
1019 return !mb_x ? DC_129_PRED : mode;
1021 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1022 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1023 case DIAG_DOWN_RIGHT_PRED:
1024 case VERT_RIGHT_PRED:
1033 static av_always_inline
1034 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1035 VP8Macroblock *mb, int mb_x, int mb_y)
1037 AVCodecContext *avctx = s->avctx;
1038 int x, y, mode, nnz;
1041 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1042 // otherwise, skip it if we aren't going to deblock
1043 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1044 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1045 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1046 s->filter.simple, 1);
1048 if (mb->mode < MODE_I4x4) {
1049 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1050 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1052 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1054 s->hpc.pred16x16[mode](dst[0], s->linesize);
1056 uint8_t *ptr = dst[0];
1057 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1058 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1060 // all blocks on the right edge of the macroblock use bottom edge
1061 // the top macroblock for their topright edge
1062 uint8_t *tr_right = ptr - s->linesize + 16;
1064 // if we're on the right edge of the frame, said edge is extended
1065 // from the top macroblock
1066 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1067 mb_x == s->mb_width-1) {
1068 tr = tr_right[-1]*0x01010101u;
1069 tr_right = (uint8_t *)&tr;
1073 AV_ZERO128(td->non_zero_count_cache);
1075 for (y = 0; y < 4; y++) {
1076 uint8_t *topright = ptr + 4 - s->linesize;
1077 for (x = 0; x < 4; x++) {
1078 int copy = 0, linesize = s->linesize;
1079 uint8_t *dst = ptr+4*x;
1080 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1082 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1085 topright = tr_right;
1087 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1088 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1090 dst = copy_dst + 12;
1094 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1096 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1100 copy_dst[3] = ptr[4*x-s->linesize-1];
1107 copy_dst[35] = 129U;
1109 copy_dst[11] = ptr[4*x -1];
1110 copy_dst[19] = ptr[4*x+s->linesize -1];
1111 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1112 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1118 s->hpc.pred4x4[mode](dst, topright, linesize);
1120 AV_COPY32(ptr+4*x , copy_dst+12);
1121 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1122 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1123 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1126 nnz = td->non_zero_count_cache[y][x];
1129 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1131 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1136 ptr += 4*s->linesize;
1141 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1142 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1144 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1146 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1147 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1149 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1150 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1151 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1152 s->filter.simple, 0);
1155 static const uint8_t subpel_idx[3][8] = {
1156 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1157 // also function pointer index
1158 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1159 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1165 * @param s VP8 decoding context
1166 * @param dst target buffer for block data at block position
1167 * @param ref reference picture buffer at origin (0, 0)
1168 * @param mv motion vector (relative to block position) to get pixel data from
1169 * @param x_off horizontal position of block from origin (0, 0)
1170 * @param y_off vertical position of block from origin (0, 0)
1171 * @param block_w width of block (16, 8 or 4)
1172 * @param block_h height of block (always same as block_w)
1173 * @param width width of src/dst plane data
1174 * @param height height of src/dst plane data
1175 * @param linesize size of a single line of plane data, including padding
1176 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1178 static av_always_inline
1179 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1180 AVFrame *ref, const VP56mv *mv,
1181 int x_off, int y_off, int block_w, int block_h,
1182 int width, int height, int linesize,
1183 vp8_mc_func mc_func[3][3])
1185 uint8_t *src = ref->data[0];
1189 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1190 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1192 x_off += mv->x >> 2;
1193 y_off += mv->y >> 2;
1196 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1197 src += y_off * linesize + x_off;
1198 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1199 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1200 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1201 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1202 x_off - mx_idx, y_off - my_idx, width, height);
1203 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1205 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1207 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1208 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1213 * chroma MC function
1215 * @param s VP8 decoding context
1216 * @param dst1 target buffer for block data at block position (U plane)
1217 * @param dst2 target buffer for block data at block position (V plane)
1218 * @param ref reference picture buffer at origin (0, 0)
1219 * @param mv motion vector (relative to block position) to get pixel data from
1220 * @param x_off horizontal position of block from origin (0, 0)
1221 * @param y_off vertical position of block from origin (0, 0)
1222 * @param block_w width of block (16, 8 or 4)
1223 * @param block_h height of block (always same as block_w)
1224 * @param width width of src/dst plane data
1225 * @param height height of src/dst plane data
1226 * @param linesize size of a single line of plane data, including padding
1227 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1229 static av_always_inline
1230 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1231 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1232 int block_w, int block_h, int width, int height, int linesize,
1233 vp8_mc_func mc_func[3][3])
1235 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1238 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1239 int my = mv->y&7, my_idx = subpel_idx[0][my];
1241 x_off += mv->x >> 3;
1242 y_off += mv->y >> 3;
1245 src1 += y_off * linesize + x_off;
1246 src2 += y_off * linesize + x_off;
1247 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1248 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1249 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1250 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1251 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1252 x_off - mx_idx, y_off - my_idx, width, height);
1253 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1254 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1256 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1257 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1258 x_off - mx_idx, y_off - my_idx, width, height);
1259 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1260 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1262 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1263 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1266 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1267 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1268 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1272 static av_always_inline
1273 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1274 AVFrame *ref_frame, int x_off, int y_off,
1275 int bx_off, int by_off,
1276 int block_w, int block_h,
1277 int width, int height, VP56mv *mv)
1282 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1283 ref_frame, mv, x_off + bx_off, y_off + by_off,
1284 block_w, block_h, width, height, s->linesize,
1285 s->put_pixels_tab[block_w == 8]);
1288 if (s->profile == 3) {
1292 x_off >>= 1; y_off >>= 1;
1293 bx_off >>= 1; by_off >>= 1;
1294 width >>= 1; height >>= 1;
1295 block_w >>= 1; block_h >>= 1;
1296 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1297 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1298 &uvmv, x_off + bx_off, y_off + by_off,
1299 block_w, block_h, width, height, s->uvlinesize,
1300 s->put_pixels_tab[1 + (block_w == 4)]);
1303 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1304 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1305 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1307 /* Don't prefetch refs that haven't been used very often this frame. */
1308 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1309 int x_off = mb_x << 4, y_off = mb_y << 4;
1310 int mx = (mb->mv.x>>2) + x_off + 8;
1311 int my = (mb->mv.y>>2) + y_off;
1312 uint8_t **src= s->framep[ref]->data;
1313 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1314 /* For threading, a ff_thread_await_progress here might be useful, but
1315 * it actually slows down the decoder. Since a bad prefetch doesn't
1316 * generate bad decoder output, we don't run it here. */
1317 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1318 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1319 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1324 * Apply motion vectors to prediction buffer, chapter 18.
1326 static av_always_inline
1327 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1328 VP8Macroblock *mb, int mb_x, int mb_y)
1330 int x_off = mb_x << 4, y_off = mb_y << 4;
1331 int width = 16*s->mb_width, height = 16*s->mb_height;
1332 AVFrame *ref = s->framep[mb->ref_frame];
1333 VP56mv *bmv = mb->bmv;
1335 switch (mb->partitioning) {
1336 case VP8_SPLITMVMODE_NONE:
1337 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1338 0, 0, 16, 16, width, height, &mb->mv);
1340 case VP8_SPLITMVMODE_4x4: {
1345 for (y = 0; y < 4; y++) {
1346 for (x = 0; x < 4; x++) {
1347 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1349 4*x + x_off, 4*y + y_off, 4, 4,
1350 width, height, s->linesize,
1351 s->put_pixels_tab[2]);
1356 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1357 for (y = 0; y < 2; y++) {
1358 for (x = 0; x < 2; x++) {
1359 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1360 mb->bmv[ 2*y * 4 + 2*x+1].x +
1361 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1362 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1363 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1364 mb->bmv[ 2*y * 4 + 2*x+1].y +
1365 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1366 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1367 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1368 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1369 if (s->profile == 3) {
1373 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1374 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1375 4*x + x_off, 4*y + y_off, 4, 4,
1376 width, height, s->uvlinesize,
1377 s->put_pixels_tab[2]);
1382 case VP8_SPLITMVMODE_16x8:
1383 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1384 0, 0, 16, 8, width, height, &bmv[0]);
1385 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386 0, 8, 16, 8, width, height, &bmv[1]);
1388 case VP8_SPLITMVMODE_8x16:
1389 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1390 0, 0, 8, 16, width, height, &bmv[0]);
1391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392 8, 0, 8, 16, width, height, &bmv[1]);
1394 case VP8_SPLITMVMODE_8x8:
1395 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396 0, 0, 8, 8, width, height, &bmv[0]);
1397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398 8, 0, 8, 8, width, height, &bmv[1]);
1399 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1400 0, 8, 8, 8, width, height, &bmv[2]);
1401 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402 8, 8, 8, 8, width, height, &bmv[3]);
1407 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1408 uint8_t *dst[3], VP8Macroblock *mb)
1412 if (mb->mode != MODE_I4x4) {
1413 uint8_t *y_dst = dst[0];
1414 for (y = 0; y < 4; y++) {
1415 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1417 if (nnz4&~0x01010101) {
1418 for (x = 0; x < 4; x++) {
1419 if ((uint8_t)nnz4 == 1)
1420 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1421 else if((uint8_t)nnz4 > 1)
1422 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1428 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1431 y_dst += 4*s->linesize;
1435 for (ch = 0; ch < 2; ch++) {
1436 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1438 uint8_t *ch_dst = dst[1+ch];
1439 if (nnz4&~0x01010101) {
1440 for (y = 0; y < 2; y++) {
1441 for (x = 0; x < 2; x++) {
1442 if ((uint8_t)nnz4 == 1)
1443 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1444 else if((uint8_t)nnz4 > 1)
1445 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1448 goto chroma_idct_end;
1450 ch_dst += 4*s->uvlinesize;
1453 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1460 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1462 int interior_limit, filter_level;
1464 if (s->segmentation.enabled) {
1465 filter_level = s->segmentation.filter_level[mb->segment];
1466 if (!s->segmentation.absolute_vals)
1467 filter_level += s->filter.level;
1469 filter_level = s->filter.level;
1471 if (s->lf_delta.enabled) {
1472 filter_level += s->lf_delta.ref[mb->ref_frame];
1473 filter_level += s->lf_delta.mode[mb->mode];
1476 filter_level = av_clip_uintp2(filter_level, 6);
1478 interior_limit = filter_level;
1479 if (s->filter.sharpness) {
1480 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1481 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1483 interior_limit = FFMAX(interior_limit, 1);
1485 f->filter_level = filter_level;
1486 f->inner_limit = interior_limit;
1487 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1490 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1492 int mbedge_lim, bedge_lim, hev_thresh;
1493 int filter_level = f->filter_level;
1494 int inner_limit = f->inner_limit;
1495 int inner_filter = f->inner_filter;
1496 int linesize = s->linesize;
1497 int uvlinesize = s->uvlinesize;
1498 static const uint8_t hev_thresh_lut[2][64] = {
1499 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1500 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1501 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1503 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1505 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1512 bedge_lim = 2*filter_level + inner_limit;
1513 mbedge_lim = bedge_lim + 4;
1515 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1518 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1519 mbedge_lim, inner_limit, hev_thresh);
1520 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1521 mbedge_lim, inner_limit, hev_thresh);
1525 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1526 inner_limit, hev_thresh);
1527 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1528 inner_limit, hev_thresh);
1529 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1530 inner_limit, hev_thresh);
1531 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1532 uvlinesize, bedge_lim,
1533 inner_limit, hev_thresh);
1537 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1538 mbedge_lim, inner_limit, hev_thresh);
1539 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1540 mbedge_lim, inner_limit, hev_thresh);
1544 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1545 linesize, bedge_lim,
1546 inner_limit, hev_thresh);
1547 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1548 linesize, bedge_lim,
1549 inner_limit, hev_thresh);
1550 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1551 linesize, bedge_lim,
1552 inner_limit, hev_thresh);
1553 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1554 dst[2] + 4 * uvlinesize,
1555 uvlinesize, bedge_lim,
1556 inner_limit, hev_thresh);
1560 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1562 int mbedge_lim, bedge_lim;
1563 int filter_level = f->filter_level;
1564 int inner_limit = f->inner_limit;
1565 int inner_filter = f->inner_filter;
1566 int linesize = s->linesize;
1571 bedge_lim = 2*filter_level + inner_limit;
1572 mbedge_lim = bedge_lim + 4;
1575 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1579 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1583 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1587 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1591 static void release_queued_segmaps(VP8Context *s, int is_close)
1593 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1594 while (s->num_maps_to_be_freed > leave_behind)
1595 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1596 s->maps_are_invalid = 0;
1599 #define MARGIN (16 << 2)
1600 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1601 AVFrame *prev_frame)
1603 VP8Context *s = avctx->priv_data;
1606 s->mv_min.y = -MARGIN;
1607 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1608 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1609 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1610 int mb_xy = mb_y*s->mb_width;
1612 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1614 s->mv_min.x = -MARGIN;
1615 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1616 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1618 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1619 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1620 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1630 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1632 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1633 if (otd->thread_mb_pos < tmp) {\
1634 pthread_mutex_lock(&otd->lock);\
1635 td->wait_mb_pos = tmp;\
1637 if (otd->thread_mb_pos >= tmp)\
1639 pthread_cond_wait(&otd->cond, &otd->lock);\
1641 td->wait_mb_pos = INT_MAX;\
1642 pthread_mutex_unlock(&otd->lock);\
1646 #define update_pos(td, mb_y, mb_x)\
1648 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1649 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1650 int is_null = (next_td == NULL) || (prev_td == NULL);\
1651 int pos_check = (is_null) ? 1 :\
1652 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1653 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1654 td->thread_mb_pos = pos;\
1655 if (sliced_threading && pos_check) {\
1656 pthread_mutex_lock(&td->lock);\
1657 pthread_cond_broadcast(&td->cond);\
1658 pthread_mutex_unlock(&td->lock);\
1662 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1663 #define update_pos(td, mb_y, mb_x)
1666 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1667 int jobnr, int threadnr)
1669 VP8Context *s = avctx->priv_data;
1670 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1671 int mb_y = td->thread_mb_pos>>16;
1672 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1673 int num_jobs = s->num_jobs;
1674 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1675 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1678 curframe->data[0] + 16*mb_y*s->linesize,
1679 curframe->data[1] + 8*mb_y*s->uvlinesize,
1680 curframe->data[2] + 8*mb_y*s->uvlinesize
1682 if (mb_y == 0) prev_td = td;
1683 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1684 if (mb_y == s->mb_height-1) next_td = td;
1685 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1686 if (s->mb_layout == 1)
1687 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1689 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1690 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1691 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1694 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1695 // left edge of 129 for intra prediction
1696 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1697 for (i = 0; i < 3; i++)
1698 for (y = 0; y < 16>>!!i; y++)
1699 dst[i][y*curframe->linesize[i]-1] = 129;
1701 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1705 s->mv_min.x = -MARGIN;
1706 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1708 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1709 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1710 if (prev_td != td) {
1711 if (threadnr != 0) {
1712 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1714 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1718 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1719 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1722 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1723 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1725 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1728 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1730 if (mb->mode <= MODE_I4x4)
1731 intra_predict(s, td, dst, mb, mb_x, mb_y);
1733 inter_predict(s, td, dst, mb, mb_x, mb_y);
1735 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1738 idct_mb(s, td, dst, mb);
1740 AV_ZERO64(td->left_nnz);
1741 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1743 // Reset DC block predictors if they would exist if the mb had coefficients
1744 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1745 td->left_nnz[8] = 0;
1746 s->top_nnz[mb_x][8] = 0;
1750 if (s->deblock_filter)
1751 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1753 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1754 if (s->filter.simple)
1755 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1757 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1760 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1768 if (mb_x == s->mb_width+1) {
1769 update_pos(td, mb_y, s->mb_width+3);
1771 update_pos(td, mb_y, mb_x);
1776 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1777 int jobnr, int threadnr)
1779 VP8Context *s = avctx->priv_data;
1780 VP8ThreadData *td = &s->thread_data[threadnr];
1781 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1782 AVFrame *curframe = s->curframe;
1784 VP8ThreadData *prev_td, *next_td;
1786 curframe->data[0] + 16*mb_y*s->linesize,
1787 curframe->data[1] + 8*mb_y*s->uvlinesize,
1788 curframe->data[2] + 8*mb_y*s->uvlinesize
1791 if (s->mb_layout == 1)
1792 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1794 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1796 if (mb_y == 0) prev_td = td;
1797 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1798 if (mb_y == s->mb_height-1) next_td = td;
1799 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1801 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1802 VP8FilterStrength *f = &td->filter_strength[mb_x];
1803 if (prev_td != td) {
1804 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1807 if (next_td != &s->thread_data[0]) {
1808 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1811 if (num_jobs == 1) {
1812 if (s->filter.simple)
1813 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1815 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1818 if (s->filter.simple)
1819 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1821 filter_mb(s, dst, f, mb_x, mb_y);
1826 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1830 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1831 int jobnr, int threadnr)
1833 VP8Context *s = avctx->priv_data;
1834 VP8ThreadData *td = &s->thread_data[jobnr];
1835 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1836 AVFrame *curframe = s->curframe;
1837 int mb_y, num_jobs = s->num_jobs;
1838 td->thread_nr = threadnr;
1839 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1840 if (mb_y >= s->mb_height) break;
1841 td->thread_mb_pos = mb_y<<16;
1842 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1843 if (s->deblock_filter)
1844 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1845 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1850 if (avctx->active_thread_type == FF_THREAD_FRAME)
1851 ff_thread_report_progress(curframe, mb_y, 0);
1857 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1860 VP8Context *s = avctx->priv_data;
1861 int ret, i, referenced, num_jobs;
1862 enum AVDiscard skip_thresh;
1863 AVFrame *av_uninit(curframe), *prev_frame;
1865 release_queued_segmaps(s, 0);
1867 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1870 prev_frame = s->framep[VP56_FRAME_CURRENT];
1872 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1873 || s->update_altref == VP56_FRAME_CURRENT;
1875 skip_thresh = !referenced ? AVDISCARD_NONREF :
1876 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1878 if (avctx->skip_frame >= skip_thresh) {
1880 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1883 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1885 // release no longer referenced frames
1886 for (i = 0; i < 5; i++)
1887 if (s->frames[i].data[0] &&
1888 &s->frames[i] != prev_frame &&
1889 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1890 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1892 vp8_release_frame(s, &s->frames[i], 1, 0);
1894 // find a free buffer
1895 for (i = 0; i < 5; i++)
1896 if (&s->frames[i] != prev_frame &&
1897 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1898 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1899 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1900 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1904 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1907 if (curframe->data[0])
1908 vp8_release_frame(s, curframe, 1, 0);
1910 // Given that arithmetic probabilities are updated every frame, it's quite likely
1911 // that the values we have on a random interframe are complete junk if we didn't
1912 // start decode on a keyframe. So just don't display anything rather than junk.
1913 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1914 !s->framep[VP56_FRAME_GOLDEN] ||
1915 !s->framep[VP56_FRAME_GOLDEN2])) {
1916 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1917 ret = AVERROR_INVALIDDATA;
1921 curframe->key_frame = s->keyframe;
1922 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1923 curframe->reference = referenced ? 3 : 0;
1924 if ((ret = vp8_alloc_frame(s, curframe))) {
1925 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1929 // check if golden and altref are swapped
1930 if (s->update_altref != VP56_FRAME_NONE) {
1931 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1933 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1935 if (s->update_golden != VP56_FRAME_NONE) {
1936 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1938 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1940 if (s->update_last) {
1941 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1943 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1945 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1947 ff_thread_finish_setup(avctx);
1949 s->linesize = curframe->linesize[0];
1950 s->uvlinesize = curframe->linesize[1];
1952 if (!s->thread_data[0].edge_emu_buffer)
1953 for (i = 0; i < MAX_THREADS; i++)
1954 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1956 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1957 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1959 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1960 if (!s->mb_layout && s->keyframe)
1961 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1963 // top edge of 127 for intra prediction
1964 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1965 s->top_border[0][15] = s->top_border[0][23] = 127;
1966 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1968 memset(s->ref_count, 0, sizeof(s->ref_count));
1971 // Make sure the previous frame has read its segmentation map,
1972 // if we re-use the same map.
1973 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1974 ff_thread_await_progress(prev_frame, 1, 0);
1976 if (s->mb_layout == 1)
1977 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1979 if (avctx->active_thread_type == FF_THREAD_FRAME)
1982 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1983 s->num_jobs = num_jobs;
1984 s->curframe = curframe;
1985 s->prev_frame = prev_frame;
1986 s->mv_min.y = -MARGIN;
1987 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1988 for (i = 0; i < MAX_THREADS; i++) {
1989 s->thread_data[i].thread_mb_pos = 0;
1990 s->thread_data[i].wait_mb_pos = INT_MAX;
1992 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1994 ff_thread_report_progress(curframe, INT_MAX, 0);
1995 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1998 // if future frames don't use the updated probabilities,
1999 // reset them to the values we saved
2000 if (!s->update_probabilities)
2001 s->prob[0] = s->prob[1];
2003 if (!s->invisible) {
2004 *(AVFrame*)data = *curframe;
2005 *data_size = sizeof(AVFrame);
2010 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2014 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2016 VP8Context *s = avctx->priv_data;
2019 avctx->pix_fmt = PIX_FMT_YUV420P;
2021 ff_dsputil_init(&s->dsp, avctx);
2022 ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
2023 ff_vp8dsp_init(&s->vp8dsp);
2028 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2030 vp8_decode_flush_impl(avctx, 0, 1, 1);
2031 release_queued_segmaps(avctx->priv_data, 1);
2035 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2037 VP8Context *s = avctx->priv_data;
2044 #define REBASE(pic) \
2045 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2047 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2049 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2051 if (s->macroblocks_base &&
2052 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2054 s->maps_are_invalid = 1;
2055 s->mb_width = s_src->mb_width;
2056 s->mb_height = s_src->mb_height;
2059 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2060 s->segmentation = s_src->segmentation;
2061 s->lf_delta = s_src->lf_delta;
2062 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2064 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2065 s->framep[0] = REBASE(s_src->next_framep[0]);
2066 s->framep[1] = REBASE(s_src->next_framep[1]);
2067 s->framep[2] = REBASE(s_src->next_framep[2]);
2068 s->framep[3] = REBASE(s_src->next_framep[3]);
2073 AVCodec ff_vp8_decoder = {
2075 .type = AVMEDIA_TYPE_VIDEO,
2077 .priv_data_size = sizeof(VP8Context),
2078 .init = vp8_decode_init,
2079 .close = vp8_decode_free,
2080 .decode = vp8_decode_frame,
2081 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2082 .flush = vp8_decode_flush,
2083 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2084 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2085 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),