2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
52 s->macroblocks = NULL;
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
88 ff_thread_release_buffer(s->avctx, f);
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
94 VP8Context *s = avctx->priv_data;
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
102 memset(s->framep, 0, sizeof(s->framep));
106 s->maps_are_invalid = 1;
110 static void vp8_decode_flush(AVCodecContext *avctx)
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
115 static int update_dimensions(VP8Context *s, int width, int height)
117 AVCodecContext *avctx = s->avctx;
120 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
127 avcodec_set_dimensions(s->avctx, width, height);
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
154 return AVERROR(ENOMEM);
156 s->macroblocks = s->macroblocks_base + 1;
161 static void parse_segment_info(VP8Context *s)
163 VP56RangeCoder *c = &s->c;
166 s->segmentation.update_map = vp8_rac_get(c);
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
182 static void update_lf_deltas(VP8Context *s)
184 VP56RangeCoder *c = &s->c;
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
208 const uint8_t *sizes = buf;
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
219 int size = AV_RL24(sizes + 3*i);
220 if (buf_size - size < 0)
223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
232 static void get_quants(VP8Context *s)
234 VP56RangeCoder *c = &s->c;
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 /* 101581>>16 is equivalent to 155/100 */
256 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
257 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
258 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
260 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
261 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
266 * Determine which buffers golden and altref should be updated with after this frame.
267 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
269 * Intra frames update all 3 references
270 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
271 * If the update (golden|altref) flag is set, it's updated with the current frame
272 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
273 * If the flag is not set, the number read means:
275 * 1: VP56_FRAME_PREVIOUS
276 * 2: update golden with altref, or update altref with golden
278 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
280 VP56RangeCoder *c = &s->c;
283 return VP56_FRAME_CURRENT;
285 switch (vp8_rac_get_uint(c, 2)) {
287 return VP56_FRAME_PREVIOUS;
289 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
291 return VP56_FRAME_NONE;
294 static void update_refs(VP8Context *s)
296 VP56RangeCoder *c = &s->c;
298 int update_golden = vp8_rac_get(c);
299 int update_altref = vp8_rac_get(c);
301 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
302 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
305 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
307 VP56RangeCoder *c = &s->c;
308 int header_size, hscale, vscale, i, j, k, l, m, ret;
309 int width = s->avctx->width;
310 int height = s->avctx->height;
312 s->keyframe = !(buf[0] & 1);
313 s->profile = (buf[0]>>1) & 7;
314 s->invisible = !(buf[0] & 0x10);
315 header_size = AV_RL24(buf) >> 5;
320 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
324 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
325 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
327 if (header_size > buf_size - 7*s->keyframe) {
328 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
329 return AVERROR_INVALIDDATA;
333 if (AV_RL24(buf) != 0x2a019d) {
334 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
335 return AVERROR_INVALIDDATA;
337 width = AV_RL16(buf+3) & 0x3fff;
338 height = AV_RL16(buf+5) & 0x3fff;
339 hscale = buf[4] >> 6;
340 vscale = buf[6] >> 6;
344 if (hscale || vscale)
345 av_log_missing_feature(s->avctx, "Upscaling", 1);
347 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
348 for (i = 0; i < 4; i++)
349 for (j = 0; j < 16; j++)
350 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
351 sizeof(s->prob->token[i][j]));
352 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
353 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
354 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
355 memset(&s->segmentation, 0, sizeof(s->segmentation));
358 ff_vp56_init_range_decoder(c, buf, header_size);
360 buf_size -= header_size;
364 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
365 vp8_rac_get(c); // whether we can skip clamping in dsp functions
368 if ((s->segmentation.enabled = vp8_rac_get(c)))
369 parse_segment_info(s);
371 s->segmentation.update_map = 0; // FIXME: move this to some init function?
373 s->filter.simple = vp8_rac_get(c);
374 s->filter.level = vp8_rac_get_uint(c, 6);
375 s->filter.sharpness = vp8_rac_get_uint(c, 3);
377 if ((s->lf_delta.enabled = vp8_rac_get(c)))
381 if (setup_partitions(s, buf, buf_size)) {
382 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
383 return AVERROR_INVALIDDATA;
386 if (!s->macroblocks_base || /* first frame */
387 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
388 if ((ret = update_dimensions(s, width, height)) < 0)
396 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
397 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
400 // if we aren't saving this frame's probabilities for future frames,
401 // make a copy of the current probabilities
402 if (!(s->update_probabilities = vp8_rac_get(c)))
403 s->prob[1] = s->prob[0];
405 s->update_last = s->keyframe || vp8_rac_get(c);
407 for (i = 0; i < 4; i++)
408 for (j = 0; j < 8; j++)
409 for (k = 0; k < 3; k++)
410 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
412 int prob = vp8_rac_get_uint(c, 8);
413 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
414 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
417 if ((s->mbskip_enabled = vp8_rac_get(c)))
418 s->prob->mbskip = vp8_rac_get_uint(c, 8);
421 s->prob->intra = vp8_rac_get_uint(c, 8);
422 s->prob->last = vp8_rac_get_uint(c, 8);
423 s->prob->golden = vp8_rac_get_uint(c, 8);
426 for (i = 0; i < 4; i++)
427 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
429 for (i = 0; i < 3; i++)
430 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
432 // 17.2 MV probability update
433 for (i = 0; i < 2; i++)
434 for (j = 0; j < 19; j++)
435 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
436 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
442 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
444 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
445 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
449 * Motion vector coding, 17.1.
451 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
455 if (vp56_rac_get_prob_branchy(c, p[0])) {
458 for (i = 0; i < 3; i++)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 for (i = 9; i > 3; i--)
461 x += vp56_rac_get_prob(c, p[9 + i]) << i;
462 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
466 const uint8_t *ps = p+2;
467 bit = vp56_rac_get_prob(c, *ps);
470 bit = vp56_rac_get_prob(c, *ps);
473 x += vp56_rac_get_prob(c, *ps);
476 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
479 static av_always_inline
480 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
483 return vp8_submv_prob[4-!!left];
485 return vp8_submv_prob[2];
486 return vp8_submv_prob[1-!!left];
490 * Split motion vector prediction, 16.4.
491 * @returns the number of motion vectors parsed (2, 4 or 16)
493 static av_always_inline
494 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
498 VP8Macroblock *top_mb;
499 VP8Macroblock *left_mb = &mb[-1];
500 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
502 *mbsplits_cur, *firstidx;
504 VP56mv *left_mv = left_mb->bmv;
505 VP56mv *cur_mv = mb->bmv;
507 if (!layout) // layout is inlined, s->mb_layout is not
510 top_mb = &mb[-s->mb_width-1];
511 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
512 top_mv = top_mb->bmv;
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
516 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
518 part_idx = VP8_SPLITMVMODE_8x8;
521 part_idx = VP8_SPLITMVMODE_4x4;
524 num = vp8_mbsplit_count[part_idx];
525 mbsplits_cur = vp8_mbsplits[part_idx],
526 firstidx = vp8_mbfirstidx[part_idx];
527 mb->partitioning = part_idx;
529 for (n = 0; n < num; n++) {
531 uint32_t left, above;
532 const uint8_t *submv_prob;
535 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
537 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
539 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
541 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
543 submv_prob = get_submv_prob(left, above);
545 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
547 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
548 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
549 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
551 AV_ZERO32(&mb->bmv[n]);
554 AV_WN32A(&mb->bmv[n], above);
557 AV_WN32A(&mb->bmv[n], left);
564 static av_always_inline
565 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
567 VP8Macroblock *mb_edge[3] = { 0 /* top */,
570 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
573 int cur_sign_bias = s->sign_bias[mb->ref_frame];
574 int8_t *sign_bias = s->sign_bias;
576 uint8_t cnt[4] = { 0 };
577 VP56RangeCoder *c = &s->c;
579 if (!layout) { // layout is inlined (s->mb_layout is not)
584 mb_edge[0] = mb - s->mb_width-1;
585 mb_edge[2] = mb - s->mb_width-2;
588 AV_ZERO32(&near_mv[0]);
589 AV_ZERO32(&near_mv[1]);
590 AV_ZERO32(&near_mv[2]);
592 /* Process MB on top, left and top-left */
593 #define MV_EDGE_CHECK(n)\
595 VP8Macroblock *edge = mb_edge[n];\
596 int edge_ref = edge->ref_frame;\
597 if (edge_ref != VP56_FRAME_CURRENT) {\
598 uint32_t mv = AV_RN32A(&edge->mv);\
600 if (cur_sign_bias != sign_bias[edge_ref]) {\
601 /* SWAR negate of the values in mv. */\
603 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
605 if (!n || mv != AV_RN32A(&near_mv[idx]))\
606 AV_WN32A(&near_mv[++idx], mv);\
607 cnt[idx] += 1 + (n != 2);\
609 cnt[CNT_ZERO] += 1 + (n != 2);\
617 mb->partitioning = VP8_SPLITMVMODE_NONE;
618 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
619 mb->mode = VP8_MVMODE_MV;
621 /* If we have three distinct MVs, merge first and last if they're the same */
622 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623 cnt[CNT_NEAREST] += 1;
625 /* Swap near and nearest if necessary */
626 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
627 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
628 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
634 /* Choose the best mv out of 0,0 and the nearest mv */
635 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
637 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
638 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
640 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
641 mb->mode = VP8_MVMODE_SPLIT;
642 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
644 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
645 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
649 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
653 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
657 mb->mode = VP8_MVMODE_ZERO;
663 static av_always_inline
664 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
665 int mb_x, int keyframe, int layout)
667 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
670 VP8Macroblock *mb_top = mb - s->mb_width - 1;
671 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
676 uint8_t* const left = s->intra4x4_pred_mode_left;
678 top = mb->intra4x4_pred_mode_top;
680 top = s->intra4x4_pred_mode_top + 4 * mb_x;
681 for (y = 0; y < 4; y++) {
682 for (x = 0; x < 4; x++) {
684 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
685 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
686 left[y] = top[x] = *intra4x4;
692 for (i = 0; i < 16; i++)
693 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
697 static av_always_inline
698 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
699 uint8_t *segment, uint8_t *ref, int layout)
701 VP56RangeCoder *c = &s->c;
703 if (s->segmentation.update_map) {
704 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
705 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
706 } else if (s->segmentation.enabled)
707 *segment = ref ? *ref : *segment;
708 mb->segment = *segment;
710 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
713 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
715 if (mb->mode == MODE_I4x4) {
716 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
718 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
719 if (s->mb_layout == 1)
720 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
722 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
723 AV_WN32A( s->intra4x4_pred_mode_left, modes);
726 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
727 mb->ref_frame = VP56_FRAME_CURRENT;
728 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
730 if (vp56_rac_get_prob_branchy(c, s->prob->last))
731 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
732 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
734 mb->ref_frame = VP56_FRAME_PREVIOUS;
735 s->ref_count[mb->ref_frame-1]++;
737 // motion vectors, 16.3
738 decode_mvs(s, mb, mb_x, mb_y, layout);
741 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
743 if (mb->mode == MODE_I4x4)
744 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
746 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
747 mb->ref_frame = VP56_FRAME_CURRENT;
748 mb->partitioning = VP8_SPLITMVMODE_NONE;
749 AV_ZERO32(&mb->bmv[0]);
753 #ifndef decode_block_coeffs_internal
755 * @param r arithmetic bitstream reader context
756 * @param block destination for block coefficients
757 * @param probs probabilities to use when reading trees from the bitstream
758 * @param i initial coeff index, 0 unless a separate DC block is coded
759 * @param qmul array holding the dc/ac dequant factor at position 0/1
760 * @return 0 if no coeffs were decoded
761 * otherwise, the index of the last coeff decoded plus one
763 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
764 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
765 int i, uint8_t *token_prob, int16_t qmul[2])
767 VP56RangeCoder c = *r;
771 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
775 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
777 break; // invalid input; blocks should end with EOB
778 token_prob = probs[i][0];
782 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
784 token_prob = probs[i+1][1];
786 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
787 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
789 coeff += vp56_rac_get_prob(&c, token_prob[5]);
793 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
794 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
795 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
798 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
799 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
801 } else { // DCT_CAT3 and up
802 int a = vp56_rac_get_prob(&c, token_prob[8]);
803 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
804 int cat = (a<<1) + b;
805 coeff = 3 + (8<<cat);
806 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
809 token_prob = probs[i+1][2];
811 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
820 * @param c arithmetic bitstream reader context
821 * @param block destination for block coefficients
822 * @param probs probabilities to use when reading trees from the bitstream
823 * @param i initial coeff index, 0 unless a separate DC block is coded
824 * @param zero_nhood the initial prediction context for number of surrounding
825 * all-zero blocks (only left/top, so 0-2)
826 * @param qmul array holding the dc/ac dequant factor at position 0/1
827 * @return 0 if no coeffs were decoded
828 * otherwise, the index of the last coeff decoded plus one
830 static av_always_inline
831 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
832 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
833 int i, int zero_nhood, int16_t qmul[2])
835 uint8_t *token_prob = probs[i][zero_nhood];
836 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
838 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
841 static av_always_inline
842 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
843 uint8_t t_nnz[9], uint8_t l_nnz[9])
845 int i, x, y, luma_start = 0, luma_ctx = 3;
846 int nnz_pred, nnz, nnz_total = 0;
847 int segment = mb->segment;
850 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
851 nnz_pred = t_nnz[8] + l_nnz[8];
853 // decode DC values and do hadamard
854 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
855 s->qmat[segment].luma_dc_qmul);
856 l_nnz[8] = t_nnz[8] = !!nnz;
861 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
863 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
870 for (y = 0; y < 4; y++)
871 for (x = 0; x < 4; x++) {
872 nnz_pred = l_nnz[y] + t_nnz[x];
873 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
874 nnz_pred, s->qmat[segment].luma_qmul);
875 // nnz+block_dc may be one more than the actual last index, but we don't care
876 td->non_zero_count_cache[y][x] = nnz + block_dc;
877 t_nnz[x] = l_nnz[y] = !!nnz;
882 // TODO: what to do about dimensions? 2nd dim for luma is x,
883 // but for chroma it's (y<<1)|x
884 for (i = 4; i < 6; i++)
885 for (y = 0; y < 2; y++)
886 for (x = 0; x < 2; x++) {
887 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
888 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
889 nnz_pred, s->qmat[segment].chroma_qmul);
890 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
891 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
895 // if there were no coded coeffs despite the macroblock not being marked skip,
896 // we MUST not do the inner loop filter and should not do IDCT
897 // Since skip isn't used for bitstream prediction, just manually set it.
902 static av_always_inline
903 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
904 int linesize, int uvlinesize, int simple)
906 AV_COPY128(top_border, src_y + 15*linesize);
908 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
909 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
913 static av_always_inline
914 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
915 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
916 int simple, int xchg)
918 uint8_t *top_border_m1 = top_border-32; // for TL prediction
920 src_cb -= uvlinesize;
921 src_cr -= uvlinesize;
923 #define XCHG(a,b,xchg) do { \
924 if (xchg) AV_SWAP64(b,a); \
925 else AV_COPY64(b,a); \
928 XCHG(top_border_m1+8, src_y-8, xchg);
929 XCHG(top_border, src_y, xchg);
930 XCHG(top_border+8, src_y+8, 1);
931 if (mb_x < mb_width-1)
932 XCHG(top_border+32, src_y+16, 1);
934 // only copy chroma for normal loop filter
935 // or to initialize the top row to 127
936 if (!simple || !mb_y) {
937 XCHG(top_border_m1+16, src_cb-8, xchg);
938 XCHG(top_border_m1+24, src_cr-8, xchg);
939 XCHG(top_border+16, src_cb, 1);
940 XCHG(top_border+24, src_cr, 1);
944 static av_always_inline
945 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
948 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
950 return mb_y ? mode : LEFT_DC_PRED8x8;
954 static av_always_inline
955 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
958 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
960 return mb_y ? mode : HOR_PRED8x8;
964 static av_always_inline
965 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
967 if (mode == DC_PRED8x8) {
968 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
974 static av_always_inline
975 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
979 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
981 return !mb_y ? DC_127_PRED8x8 : mode;
983 return !mb_x ? DC_129_PRED8x8 : mode;
984 case PLANE_PRED8x8 /*TM*/:
985 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
990 static av_always_inline
991 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
994 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
996 return mb_y ? mode : HOR_VP8_PRED;
1000 static av_always_inline
1001 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1005 if (!mb_x && mb_y) {
1010 case DIAG_DOWN_LEFT_PRED:
1011 case VERT_LEFT_PRED:
1012 return !mb_y ? DC_127_PRED : mode;
1020 return !mb_x ? DC_129_PRED : mode;
1022 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1023 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1024 case DIAG_DOWN_RIGHT_PRED:
1025 case VERT_RIGHT_PRED:
1034 static av_always_inline
1035 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1036 VP8Macroblock *mb, int mb_x, int mb_y)
1038 AVCodecContext *avctx = s->avctx;
1039 int x, y, mode, nnz;
1042 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1043 // otherwise, skip it if we aren't going to deblock
1044 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1045 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1046 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1047 s->filter.simple, 1);
1049 if (mb->mode < MODE_I4x4) {
1050 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1051 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1053 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1055 s->hpc.pred16x16[mode](dst[0], s->linesize);
1057 uint8_t *ptr = dst[0];
1058 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1059 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1061 // all blocks on the right edge of the macroblock use bottom edge
1062 // the top macroblock for their topright edge
1063 uint8_t *tr_right = ptr - s->linesize + 16;
1065 // if we're on the right edge of the frame, said edge is extended
1066 // from the top macroblock
1067 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1068 mb_x == s->mb_width-1) {
1069 tr = tr_right[-1]*0x01010101u;
1070 tr_right = (uint8_t *)&tr;
1074 AV_ZERO128(td->non_zero_count_cache);
1076 for (y = 0; y < 4; y++) {
1077 uint8_t *topright = ptr + 4 - s->linesize;
1078 for (x = 0; x < 4; x++) {
1079 int copy = 0, linesize = s->linesize;
1080 uint8_t *dst = ptr+4*x;
1081 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1083 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1086 topright = tr_right;
1088 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1089 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1091 dst = copy_dst + 12;
1095 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1097 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1101 copy_dst[3] = ptr[4*x-s->linesize-1];
1108 copy_dst[35] = 129U;
1110 copy_dst[11] = ptr[4*x -1];
1111 copy_dst[19] = ptr[4*x+s->linesize -1];
1112 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1113 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1119 s->hpc.pred4x4[mode](dst, topright, linesize);
1121 AV_COPY32(ptr+4*x , copy_dst+12);
1122 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1123 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1124 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1127 nnz = td->non_zero_count_cache[y][x];
1130 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1132 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1137 ptr += 4*s->linesize;
1142 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1143 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1145 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1147 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1148 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1150 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1151 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1152 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1153 s->filter.simple, 0);
1156 static const uint8_t subpel_idx[3][8] = {
1157 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1158 // also function pointer index
1159 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1160 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1166 * @param s VP8 decoding context
1167 * @param dst target buffer for block data at block position
1168 * @param ref reference picture buffer at origin (0, 0)
1169 * @param mv motion vector (relative to block position) to get pixel data from
1170 * @param x_off horizontal position of block from origin (0, 0)
1171 * @param y_off vertical position of block from origin (0, 0)
1172 * @param block_w width of block (16, 8 or 4)
1173 * @param block_h height of block (always same as block_w)
1174 * @param width width of src/dst plane data
1175 * @param height height of src/dst plane data
1176 * @param linesize size of a single line of plane data, including padding
1177 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1179 static av_always_inline
1180 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1181 AVFrame *ref, const VP56mv *mv,
1182 int x_off, int y_off, int block_w, int block_h,
1183 int width, int height, int linesize,
1184 vp8_mc_func mc_func[3][3])
1186 uint8_t *src = ref->data[0];
1190 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1191 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1193 x_off += mv->x >> 2;
1194 y_off += mv->y >> 2;
1197 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1198 src += y_off * linesize + x_off;
1199 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1200 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1201 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1202 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1203 x_off - mx_idx, y_off - my_idx, width, height);
1204 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1206 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1208 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1214 * chroma MC function
1216 * @param s VP8 decoding context
1217 * @param dst1 target buffer for block data at block position (U plane)
1218 * @param dst2 target buffer for block data at block position (V plane)
1219 * @param ref reference picture buffer at origin (0, 0)
1220 * @param mv motion vector (relative to block position) to get pixel data from
1221 * @param x_off horizontal position of block from origin (0, 0)
1222 * @param y_off vertical position of block from origin (0, 0)
1223 * @param block_w width of block (16, 8 or 4)
1224 * @param block_h height of block (always same as block_w)
1225 * @param width width of src/dst plane data
1226 * @param height height of src/dst plane data
1227 * @param linesize size of a single line of plane data, including padding
1228 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1230 static av_always_inline
1231 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1232 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233 int block_w, int block_h, int width, int height, int linesize,
1234 vp8_mc_func mc_func[3][3])
1236 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1239 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1240 int my = mv->y&7, my_idx = subpel_idx[0][my];
1242 x_off += mv->x >> 3;
1243 y_off += mv->y >> 3;
1246 src1 += y_off * linesize + x_off;
1247 src2 += y_off * linesize + x_off;
1248 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1250 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1251 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1252 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1253 x_off - mx_idx, y_off - my_idx, width, height);
1254 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1255 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1257 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1258 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1259 x_off - mx_idx, y_off - my_idx, width, height);
1260 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1261 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1263 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1264 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1267 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1268 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1269 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1273 static av_always_inline
1274 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1275 AVFrame *ref_frame, int x_off, int y_off,
1276 int bx_off, int by_off,
1277 int block_w, int block_h,
1278 int width, int height, VP56mv *mv)
1283 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1284 ref_frame, mv, x_off + bx_off, y_off + by_off,
1285 block_w, block_h, width, height, s->linesize,
1286 s->put_pixels_tab[block_w == 8]);
1289 if (s->profile == 3) {
1293 x_off >>= 1; y_off >>= 1;
1294 bx_off >>= 1; by_off >>= 1;
1295 width >>= 1; height >>= 1;
1296 block_w >>= 1; block_h >>= 1;
1297 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1298 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1299 &uvmv, x_off + bx_off, y_off + by_off,
1300 block_w, block_h, width, height, s->uvlinesize,
1301 s->put_pixels_tab[1 + (block_w == 4)]);
1304 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1305 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1306 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1308 /* Don't prefetch refs that haven't been used very often this frame. */
1309 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1310 int x_off = mb_x << 4, y_off = mb_y << 4;
1311 int mx = (mb->mv.x>>2) + x_off + 8;
1312 int my = (mb->mv.y>>2) + y_off;
1313 uint8_t **src= s->framep[ref]->data;
1314 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1315 /* For threading, a ff_thread_await_progress here might be useful, but
1316 * it actually slows down the decoder. Since a bad prefetch doesn't
1317 * generate bad decoder output, we don't run it here. */
1318 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1319 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1320 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1325 * Apply motion vectors to prediction buffer, chapter 18.
1327 static av_always_inline
1328 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1329 VP8Macroblock *mb, int mb_x, int mb_y)
1331 int x_off = mb_x << 4, y_off = mb_y << 4;
1332 int width = 16*s->mb_width, height = 16*s->mb_height;
1333 AVFrame *ref = s->framep[mb->ref_frame];
1334 VP56mv *bmv = mb->bmv;
1336 switch (mb->partitioning) {
1337 case VP8_SPLITMVMODE_NONE:
1338 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1339 0, 0, 16, 16, width, height, &mb->mv);
1341 case VP8_SPLITMVMODE_4x4: {
1346 for (y = 0; y < 4; y++) {
1347 for (x = 0; x < 4; x++) {
1348 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1350 4*x + x_off, 4*y + y_off, 4, 4,
1351 width, height, s->linesize,
1352 s->put_pixels_tab[2]);
1357 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1358 for (y = 0; y < 2; y++) {
1359 for (x = 0; x < 2; x++) {
1360 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1361 mb->bmv[ 2*y * 4 + 2*x+1].x +
1362 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1363 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1364 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1365 mb->bmv[ 2*y * 4 + 2*x+1].y +
1366 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1367 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1368 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1369 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1370 if (s->profile == 3) {
1374 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1375 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1376 4*x + x_off, 4*y + y_off, 4, 4,
1377 width, height, s->uvlinesize,
1378 s->put_pixels_tab[2]);
1383 case VP8_SPLITMVMODE_16x8:
1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385 0, 0, 16, 8, width, height, &bmv[0]);
1386 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387 0, 8, 16, 8, width, height, &bmv[1]);
1389 case VP8_SPLITMVMODE_8x16:
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 0, 0, 8, 16, width, height, &bmv[0]);
1392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393 8, 0, 8, 16, width, height, &bmv[1]);
1395 case VP8_SPLITMVMODE_8x8:
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 0, 0, 8, 8, width, height, &bmv[0]);
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 8, 0, 8, 8, width, height, &bmv[1]);
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 0, 8, 8, 8, width, height, &bmv[2]);
1402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403 8, 8, 8, 8, width, height, &bmv[3]);
1408 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1409 uint8_t *dst[3], VP8Macroblock *mb)
1413 if (mb->mode != MODE_I4x4) {
1414 uint8_t *y_dst = dst[0];
1415 for (y = 0; y < 4; y++) {
1416 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1418 if (nnz4&~0x01010101) {
1419 for (x = 0; x < 4; x++) {
1420 if ((uint8_t)nnz4 == 1)
1421 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1422 else if((uint8_t)nnz4 > 1)
1423 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1429 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1432 y_dst += 4*s->linesize;
1436 for (ch = 0; ch < 2; ch++) {
1437 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1439 uint8_t *ch_dst = dst[1+ch];
1440 if (nnz4&~0x01010101) {
1441 for (y = 0; y < 2; y++) {
1442 for (x = 0; x < 2; x++) {
1443 if ((uint8_t)nnz4 == 1)
1444 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1445 else if((uint8_t)nnz4 > 1)
1446 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1449 goto chroma_idct_end;
1451 ch_dst += 4*s->uvlinesize;
1454 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1461 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1463 int interior_limit, filter_level;
1465 if (s->segmentation.enabled) {
1466 filter_level = s->segmentation.filter_level[mb->segment];
1467 if (!s->segmentation.absolute_vals)
1468 filter_level += s->filter.level;
1470 filter_level = s->filter.level;
1472 if (s->lf_delta.enabled) {
1473 filter_level += s->lf_delta.ref[mb->ref_frame];
1474 filter_level += s->lf_delta.mode[mb->mode];
1477 filter_level = av_clip_uintp2(filter_level, 6);
1479 interior_limit = filter_level;
1480 if (s->filter.sharpness) {
1481 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1482 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1484 interior_limit = FFMAX(interior_limit, 1);
1486 f->filter_level = filter_level;
1487 f->inner_limit = interior_limit;
1488 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1491 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1493 int mbedge_lim, bedge_lim, hev_thresh;
1494 int filter_level = f->filter_level;
1495 int inner_limit = f->inner_limit;
1496 int inner_filter = f->inner_filter;
1497 int linesize = s->linesize;
1498 int uvlinesize = s->uvlinesize;
1499 static const uint8_t hev_thresh_lut[2][64] = {
1500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1501 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1502 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1506 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1513 bedge_lim = 2*filter_level + inner_limit;
1514 mbedge_lim = bedge_lim + 4;
1516 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1519 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1520 mbedge_lim, inner_limit, hev_thresh);
1521 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1522 mbedge_lim, inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1533 uvlinesize, bedge_lim,
1534 inner_limit, hev_thresh);
1538 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1539 mbedge_lim, inner_limit, hev_thresh);
1540 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1541 mbedge_lim, inner_limit, hev_thresh);
1545 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1546 linesize, bedge_lim,
1547 inner_limit, hev_thresh);
1548 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1549 linesize, bedge_lim,
1550 inner_limit, hev_thresh);
1551 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1552 linesize, bedge_lim,
1553 inner_limit, hev_thresh);
1554 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1555 dst[2] + 4 * uvlinesize,
1556 uvlinesize, bedge_lim,
1557 inner_limit, hev_thresh);
1561 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1563 int mbedge_lim, bedge_lim;
1564 int filter_level = f->filter_level;
1565 int inner_limit = f->inner_limit;
1566 int inner_filter = f->inner_filter;
1567 int linesize = s->linesize;
1572 bedge_lim = 2*filter_level + inner_limit;
1573 mbedge_lim = bedge_lim + 4;
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1579 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1587 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1592 static void release_queued_segmaps(VP8Context *s, int is_close)
1594 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1595 while (s->num_maps_to_be_freed > leave_behind)
1596 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1597 s->maps_are_invalid = 0;
1600 #define MARGIN (16 << 2)
1601 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1602 AVFrame *prev_frame)
1604 VP8Context *s = avctx->priv_data;
1607 s->mv_min.y = -MARGIN;
1608 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1609 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1610 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1611 int mb_xy = mb_y*s->mb_width;
1613 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1615 s->mv_min.x = -MARGIN;
1616 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1617 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1619 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1620 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1621 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1631 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1633 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1634 if (otd->thread_mb_pos < tmp) {\
1635 pthread_mutex_lock(&otd->lock);\
1636 td->wait_mb_pos = tmp;\
1638 if (otd->thread_mb_pos >= tmp)\
1640 pthread_cond_wait(&otd->cond, &otd->lock);\
1642 td->wait_mb_pos = INT_MAX;\
1643 pthread_mutex_unlock(&otd->lock);\
1647 #define update_pos(td, mb_y, mb_x)\
1649 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1650 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1651 int is_null = (next_td == NULL) || (prev_td == NULL);\
1652 int pos_check = (is_null) ? 1 :\
1653 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1654 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1655 td->thread_mb_pos = pos;\
1656 if (sliced_threading && pos_check) {\
1657 pthread_mutex_lock(&td->lock);\
1658 pthread_cond_broadcast(&td->cond);\
1659 pthread_mutex_unlock(&td->lock);\
1663 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1664 #define update_pos(td, mb_y, mb_x)
1667 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1668 int jobnr, int threadnr)
1670 VP8Context *s = avctx->priv_data;
1671 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1672 int mb_y = td->thread_mb_pos>>16;
1673 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1674 int num_jobs = s->num_jobs;
1675 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1676 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1679 curframe->data[0] + 16*mb_y*s->linesize,
1680 curframe->data[1] + 8*mb_y*s->uvlinesize,
1681 curframe->data[2] + 8*mb_y*s->uvlinesize
1683 if (mb_y == 0) prev_td = td;
1684 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1685 if (mb_y == s->mb_height-1) next_td = td;
1686 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1687 if (s->mb_layout == 1)
1688 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1690 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1691 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1692 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1695 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1696 // left edge of 129 for intra prediction
1697 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1698 for (i = 0; i < 3; i++)
1699 for (y = 0; y < 16>>!!i; y++)
1700 dst[i][y*curframe->linesize[i]-1] = 129;
1702 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1706 s->mv_min.x = -MARGIN;
1707 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1709 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1710 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1711 if (prev_td != td) {
1712 if (threadnr != 0) {
1713 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1715 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1719 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1720 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1723 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1724 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1726 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1729 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1731 if (mb->mode <= MODE_I4x4)
1732 intra_predict(s, td, dst, mb, mb_x, mb_y);
1734 inter_predict(s, td, dst, mb, mb_x, mb_y);
1736 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1739 idct_mb(s, td, dst, mb);
1741 AV_ZERO64(td->left_nnz);
1742 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1744 // Reset DC block predictors if they would exist if the mb had coefficients
1745 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1746 td->left_nnz[8] = 0;
1747 s->top_nnz[mb_x][8] = 0;
1751 if (s->deblock_filter)
1752 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1754 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1755 if (s->filter.simple)
1756 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1758 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1761 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1769 if (mb_x == s->mb_width+1) {
1770 update_pos(td, mb_y, s->mb_width+3);
1772 update_pos(td, mb_y, mb_x);
1777 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1778 int jobnr, int threadnr)
1780 VP8Context *s = avctx->priv_data;
1781 VP8ThreadData *td = &s->thread_data[threadnr];
1782 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1783 AVFrame *curframe = s->curframe;
1785 VP8ThreadData *prev_td, *next_td;
1787 curframe->data[0] + 16*mb_y*s->linesize,
1788 curframe->data[1] + 8*mb_y*s->uvlinesize,
1789 curframe->data[2] + 8*mb_y*s->uvlinesize
1792 if (s->mb_layout == 1)
1793 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1795 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1797 if (mb_y == 0) prev_td = td;
1798 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1799 if (mb_y == s->mb_height-1) next_td = td;
1800 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1802 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1803 VP8FilterStrength *f = &td->filter_strength[mb_x];
1804 if (prev_td != td) {
1805 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1808 if (next_td != &s->thread_data[0]) {
1809 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1812 if (num_jobs == 1) {
1813 if (s->filter.simple)
1814 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1816 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1819 if (s->filter.simple)
1820 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1822 filter_mb(s, dst, f, mb_x, mb_y);
1827 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1831 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1832 int jobnr, int threadnr)
1834 VP8Context *s = avctx->priv_data;
1835 VP8ThreadData *td = &s->thread_data[jobnr];
1836 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1837 AVFrame *curframe = s->curframe;
1838 int mb_y, num_jobs = s->num_jobs;
1839 td->thread_nr = threadnr;
1840 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1841 if (mb_y >= s->mb_height) break;
1842 td->thread_mb_pos = mb_y<<16;
1843 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1844 if (s->deblock_filter)
1845 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1846 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1851 if (avctx->active_thread_type == FF_THREAD_FRAME)
1852 ff_thread_report_progress(curframe, mb_y, 0);
1858 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1861 VP8Context *s = avctx->priv_data;
1862 int ret, i, referenced, num_jobs;
1863 enum AVDiscard skip_thresh;
1864 AVFrame *av_uninit(curframe), *prev_frame;
1866 release_queued_segmaps(s, 0);
1868 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1871 prev_frame = s->framep[VP56_FRAME_CURRENT];
1873 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1874 || s->update_altref == VP56_FRAME_CURRENT;
1876 skip_thresh = !referenced ? AVDISCARD_NONREF :
1877 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1879 if (avctx->skip_frame >= skip_thresh) {
1881 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1884 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1886 // release no longer referenced frames
1887 for (i = 0; i < 5; i++)
1888 if (s->frames[i].data[0] &&
1889 &s->frames[i] != prev_frame &&
1890 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1893 vp8_release_frame(s, &s->frames[i], 1, 0);
1895 // find a free buffer
1896 for (i = 0; i < 5; i++)
1897 if (&s->frames[i] != prev_frame &&
1898 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1899 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1901 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1905 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1908 if (curframe->data[0])
1909 vp8_release_frame(s, curframe, 1, 0);
1911 // Given that arithmetic probabilities are updated every frame, it's quite likely
1912 // that the values we have on a random interframe are complete junk if we didn't
1913 // start decode on a keyframe. So just don't display anything rather than junk.
1914 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1915 !s->framep[VP56_FRAME_GOLDEN] ||
1916 !s->framep[VP56_FRAME_GOLDEN2])) {
1917 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1918 ret = AVERROR_INVALIDDATA;
1922 curframe->key_frame = s->keyframe;
1923 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1924 curframe->reference = referenced ? 3 : 0;
1925 if ((ret = vp8_alloc_frame(s, curframe))) {
1926 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1930 // check if golden and altref are swapped
1931 if (s->update_altref != VP56_FRAME_NONE) {
1932 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1934 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1936 if (s->update_golden != VP56_FRAME_NONE) {
1937 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1939 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1941 if (s->update_last) {
1942 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1944 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1946 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1948 ff_thread_finish_setup(avctx);
1950 s->linesize = curframe->linesize[0];
1951 s->uvlinesize = curframe->linesize[1];
1953 if (!s->thread_data[0].edge_emu_buffer)
1954 for (i = 0; i < MAX_THREADS; i++)
1955 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1957 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1958 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1960 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1961 if (!s->mb_layout && s->keyframe)
1962 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1964 // top edge of 127 for intra prediction
1965 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1966 s->top_border[0][15] = s->top_border[0][23] = 127;
1967 memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1969 memset(s->ref_count, 0, sizeof(s->ref_count));
1972 // Make sure the previous frame has read its segmentation map,
1973 // if we re-use the same map.
1974 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1975 ff_thread_await_progress(prev_frame, 1, 0);
1977 if (s->mb_layout == 1)
1978 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1980 if (avctx->active_thread_type == FF_THREAD_FRAME)
1983 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1984 s->num_jobs = num_jobs;
1985 s->curframe = curframe;
1986 s->prev_frame = prev_frame;
1987 s->mv_min.y = -MARGIN;
1988 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1989 for (i = 0; i < MAX_THREADS; i++) {
1990 s->thread_data[i].thread_mb_pos = 0;
1991 s->thread_data[i].wait_mb_pos = INT_MAX;
1993 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1995 ff_thread_report_progress(curframe, INT_MAX, 0);
1996 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1999 // if future frames don't use the updated probabilities,
2000 // reset them to the values we saved
2001 if (!s->update_probabilities)
2002 s->prob[0] = s->prob[1];
2004 if (!s->invisible) {
2005 *(AVFrame*)data = *curframe;
2006 *data_size = sizeof(AVFrame);
2011 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2015 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2017 VP8Context *s = avctx->priv_data;
2020 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2022 ff_dsputil_init(&s->dsp, avctx);
2023 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2024 ff_vp8dsp_init(&s->vp8dsp);
2029 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2031 vp8_decode_flush_impl(avctx, 0, 1, 1);
2032 release_queued_segmaps(avctx->priv_data, 1);
2036 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2038 VP8Context *s = avctx->priv_data;
2045 #define REBASE(pic) \
2046 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2048 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2050 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2052 if (s->macroblocks_base &&
2053 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2055 s->maps_are_invalid = 1;
2056 s->mb_width = s_src->mb_width;
2057 s->mb_height = s_src->mb_height;
2060 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2061 s->segmentation = s_src->segmentation;
2062 s->lf_delta = s_src->lf_delta;
2063 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2065 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2066 s->framep[0] = REBASE(s_src->next_framep[0]);
2067 s->framep[1] = REBASE(s_src->next_framep[1]);
2068 s->framep[2] = REBASE(s_src->next_framep[2]);
2069 s->framep[3] = REBASE(s_src->next_framep[3]);
2074 AVCodec ff_vp8_decoder = {
2076 .type = AVMEDIA_TYPE_VIDEO,
2077 .id = AV_CODEC_ID_VP8,
2078 .priv_data_size = sizeof(VP8Context),
2079 .init = vp8_decode_init,
2080 .close = vp8_decode_free,
2081 .decode = vp8_decode_frame,
2082 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2083 .flush = vp8_decode_flush,
2084 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2085 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2086 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),