2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
44 pthread_cond_destroy(&s->thread_data[i].cond);
45 pthread_mutex_destroy(&s->thread_data[i].lock);
47 av_freep(&s->thread_data[i].filter_strength);
48 av_freep(&s->thread_data[i].edge_emu_buffer);
50 av_freep(&s->thread_data);
51 av_freep(&s->macroblocks_base);
52 av_freep(&s->intra4x4_pred_mode_top);
53 av_freep(&s->top_nnz);
54 av_freep(&s->top_border);
56 s->macroblocks = NULL;
59 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
62 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
63 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
65 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
66 ff_thread_release_buffer(s->avctx, &f->tf);
67 return AVERROR(ENOMEM);
72 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
74 av_buffer_unref(&f->seg_map);
75 ff_thread_release_buffer(s->avctx, &f->tf);
78 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
82 vp8_release_frame(s, dst);
84 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
87 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
88 vp8_release_frame(s, dst);
89 return AVERROR(ENOMEM);
96 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
98 VP8Context *s = avctx->priv_data;
101 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
102 vp8_release_frame(s, &s->frames[i]);
103 memset(s->framep, 0, sizeof(s->framep));
109 static void vp8_decode_flush(AVCodecContext *avctx)
111 vp8_decode_flush_impl(avctx, 0);
114 static int update_dimensions(VP8Context *s, int width, int height)
116 AVCodecContext *avctx = s->avctx;
119 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
120 height != s->avctx->height) {
121 if (av_image_check_size(width, height, 0, s->avctx))
122 return AVERROR_INVALIDDATA;
124 vp8_decode_flush_impl(s->avctx, 1);
126 avcodec_set_dimensions(s->avctx, width, height);
129 s->mb_width = (s->avctx->coded_width +15) / 16;
130 s->mb_height = (s->avctx->coded_height+15) / 16;
132 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
133 if (!s->mb_layout) { // Frame threading and one thread
134 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
135 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
137 else // Sliced threading
138 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
139 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
140 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
141 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
143 for (i = 0; i < MAX_THREADS; i++) {
144 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146 pthread_mutex_init(&s->thread_data[i].lock, NULL);
147 pthread_cond_init(&s->thread_data[i].cond, NULL);
151 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
152 (!s->intra4x4_pred_mode_top && !s->mb_layout))
153 return AVERROR(ENOMEM);
155 s->macroblocks = s->macroblocks_base + 1;
160 static void parse_segment_info(VP8Context *s)
162 VP56RangeCoder *c = &s->c;
165 s->segmentation.update_map = vp8_rac_get(c);
167 if (vp8_rac_get(c)) { // update segment feature data
168 s->segmentation.absolute_vals = vp8_rac_get(c);
170 for (i = 0; i < 4; i++)
171 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
173 for (i = 0; i < 4; i++)
174 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
176 if (s->segmentation.update_map)
177 for (i = 0; i < 3; i++)
178 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
181 static void update_lf_deltas(VP8Context *s)
183 VP56RangeCoder *c = &s->c;
186 for (i = 0; i < 4; i++) {
187 if (vp8_rac_get(c)) {
188 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
191 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
195 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
196 if (vp8_rac_get(c)) {
197 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
200 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
205 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
207 const uint8_t *sizes = buf;
210 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
212 buf += 3*(s->num_coeff_partitions-1);
213 buf_size -= 3*(s->num_coeff_partitions-1);
217 for (i = 0; i < s->num_coeff_partitions-1; i++) {
218 int size = AV_RL24(sizes + 3*i);
219 if (buf_size - size < 0)
222 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
226 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
231 static void get_quants(VP8Context *s)
233 VP56RangeCoder *c = &s->c;
236 int yac_qi = vp8_rac_get_uint(c, 7);
237 int ydc_delta = vp8_rac_get_sint(c, 4);
238 int y2dc_delta = vp8_rac_get_sint(c, 4);
239 int y2ac_delta = vp8_rac_get_sint(c, 4);
240 int uvdc_delta = vp8_rac_get_sint(c, 4);
241 int uvac_delta = vp8_rac_get_sint(c, 4);
243 for (i = 0; i < 4; i++) {
244 if (s->segmentation.enabled) {
245 base_qi = s->segmentation.base_quant[i];
246 if (!s->segmentation.absolute_vals)
251 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
252 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
253 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
254 /* 101581>>16 is equivalent to 155/100 */
255 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
256 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
257 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
260 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
265 * Determine which buffers golden and altref should be updated with after this frame.
266 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
268 * Intra frames update all 3 references
269 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
270 * If the update (golden|altref) flag is set, it's updated with the current frame
271 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
272 * If the flag is not set, the number read means:
274 * 1: VP56_FRAME_PREVIOUS
275 * 2: update golden with altref, or update altref with golden
277 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
279 VP56RangeCoder *c = &s->c;
282 return VP56_FRAME_CURRENT;
284 switch (vp8_rac_get_uint(c, 2)) {
286 return VP56_FRAME_PREVIOUS;
288 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
290 return VP56_FRAME_NONE;
293 static void update_refs(VP8Context *s)
295 VP56RangeCoder *c = &s->c;
297 int update_golden = vp8_rac_get(c);
298 int update_altref = vp8_rac_get(c);
300 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
301 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
304 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
306 VP56RangeCoder *c = &s->c;
307 int header_size, hscale, vscale, i, j, k, l, m, ret;
308 int width = s->avctx->width;
309 int height = s->avctx->height;
311 s->keyframe = !(buf[0] & 1);
312 s->profile = (buf[0]>>1) & 7;
313 s->invisible = !(buf[0] & 0x10);
314 header_size = AV_RL24(buf) >> 5;
319 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
322 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
323 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
324 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
326 if (header_size > buf_size - 7*s->keyframe) {
327 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
328 return AVERROR_INVALIDDATA;
332 if (AV_RL24(buf) != 0x2a019d) {
333 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
334 return AVERROR_INVALIDDATA;
336 width = AV_RL16(buf+3) & 0x3fff;
337 height = AV_RL16(buf+5) & 0x3fff;
338 hscale = buf[4] >> 6;
339 vscale = buf[6] >> 6;
343 if (hscale || vscale)
344 avpriv_request_sample(s->avctx, "Upscaling");
346 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
347 for (i = 0; i < 4; i++)
348 for (j = 0; j < 16; j++)
349 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
350 sizeof(s->prob->token[i][j]));
351 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
352 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
353 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
354 memset(&s->segmentation, 0, sizeof(s->segmentation));
355 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
358 ff_vp56_init_range_decoder(c, buf, header_size);
360 buf_size -= header_size;
364 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
365 vp8_rac_get(c); // whether we can skip clamping in dsp functions
368 if ((s->segmentation.enabled = vp8_rac_get(c)))
369 parse_segment_info(s);
371 s->segmentation.update_map = 0; // FIXME: move this to some init function?
373 s->filter.simple = vp8_rac_get(c);
374 s->filter.level = vp8_rac_get_uint(c, 6);
375 s->filter.sharpness = vp8_rac_get_uint(c, 3);
377 if ((s->lf_delta.enabled = vp8_rac_get(c)))
381 if (setup_partitions(s, buf, buf_size)) {
382 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
383 return AVERROR_INVALIDDATA;
386 if (!s->macroblocks_base || /* first frame */
387 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
388 if ((ret = update_dimensions(s, width, height)) < 0)
396 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
397 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
400 // if we aren't saving this frame's probabilities for future frames,
401 // make a copy of the current probabilities
402 if (!(s->update_probabilities = vp8_rac_get(c)))
403 s->prob[1] = s->prob[0];
405 s->update_last = s->keyframe || vp8_rac_get(c);
407 for (i = 0; i < 4; i++)
408 for (j = 0; j < 8; j++)
409 for (k = 0; k < 3; k++)
410 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
412 int prob = vp8_rac_get_uint(c, 8);
413 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
414 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
417 if ((s->mbskip_enabled = vp8_rac_get(c)))
418 s->prob->mbskip = vp8_rac_get_uint(c, 8);
421 s->prob->intra = vp8_rac_get_uint(c, 8);
422 s->prob->last = vp8_rac_get_uint(c, 8);
423 s->prob->golden = vp8_rac_get_uint(c, 8);
426 for (i = 0; i < 4; i++)
427 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
429 for (i = 0; i < 3; i++)
430 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
432 // 17.2 MV probability update
433 for (i = 0; i < 2; i++)
434 for (j = 0; j < 19; j++)
435 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
436 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
442 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
444 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
445 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
449 * Motion vector coding, 17.1.
451 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
455 if (vp56_rac_get_prob_branchy(c, p[0])) {
458 for (i = 0; i < 3; i++)
459 x += vp56_rac_get_prob(c, p[9 + i]) << i;
460 for (i = 9; i > 3; i--)
461 x += vp56_rac_get_prob(c, p[9 + i]) << i;
462 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
466 const uint8_t *ps = p+2;
467 bit = vp56_rac_get_prob(c, *ps);
470 bit = vp56_rac_get_prob(c, *ps);
473 x += vp56_rac_get_prob(c, *ps);
476 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
479 static av_always_inline
480 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
483 return vp8_submv_prob[4-!!left];
485 return vp8_submv_prob[2];
486 return vp8_submv_prob[1-!!left];
490 * Split motion vector prediction, 16.4.
491 * @returns the number of motion vectors parsed (2, 4 or 16)
493 static av_always_inline
494 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
498 VP8Macroblock *top_mb;
499 VP8Macroblock *left_mb = &mb[-1];
500 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
502 *mbsplits_cur, *firstidx;
504 VP56mv *left_mv = left_mb->bmv;
505 VP56mv *cur_mv = mb->bmv;
507 if (!layout) // layout is inlined, s->mb_layout is not
510 top_mb = &mb[-s->mb_width-1];
511 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
512 top_mv = top_mb->bmv;
514 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
516 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
518 part_idx = VP8_SPLITMVMODE_8x8;
521 part_idx = VP8_SPLITMVMODE_4x4;
524 num = vp8_mbsplit_count[part_idx];
525 mbsplits_cur = vp8_mbsplits[part_idx],
526 firstidx = vp8_mbfirstidx[part_idx];
527 mb->partitioning = part_idx;
529 for (n = 0; n < num; n++) {
531 uint32_t left, above;
532 const uint8_t *submv_prob;
535 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
537 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
539 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
541 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
543 submv_prob = get_submv_prob(left, above);
545 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
546 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
547 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
548 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
549 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
551 AV_ZERO32(&mb->bmv[n]);
554 AV_WN32A(&mb->bmv[n], above);
557 AV_WN32A(&mb->bmv[n], left);
564 static av_always_inline
565 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
567 VP8Macroblock *mb_edge[3] = { 0 /* top */,
570 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
573 int cur_sign_bias = s->sign_bias[mb->ref_frame];
574 int8_t *sign_bias = s->sign_bias;
576 uint8_t cnt[4] = { 0 };
577 VP56RangeCoder *c = &s->c;
579 if (!layout) { // layout is inlined (s->mb_layout is not)
584 mb_edge[0] = mb - s->mb_width-1;
585 mb_edge[2] = mb - s->mb_width-2;
588 AV_ZERO32(&near_mv[0]);
589 AV_ZERO32(&near_mv[1]);
590 AV_ZERO32(&near_mv[2]);
592 /* Process MB on top, left and top-left */
593 #define MV_EDGE_CHECK(n)\
595 VP8Macroblock *edge = mb_edge[n];\
596 int edge_ref = edge->ref_frame;\
597 if (edge_ref != VP56_FRAME_CURRENT) {\
598 uint32_t mv = AV_RN32A(&edge->mv);\
600 if (cur_sign_bias != sign_bias[edge_ref]) {\
601 /* SWAR negate of the values in mv. */\
603 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
605 if (!n || mv != AV_RN32A(&near_mv[idx]))\
606 AV_WN32A(&near_mv[++idx], mv);\
607 cnt[idx] += 1 + (n != 2);\
609 cnt[CNT_ZERO] += 1 + (n != 2);\
617 mb->partitioning = VP8_SPLITMVMODE_NONE;
618 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
619 mb->mode = VP8_MVMODE_MV;
621 /* If we have three distinct MVs, merge first and last if they're the same */
622 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623 cnt[CNT_NEAREST] += 1;
625 /* Swap near and nearest if necessary */
626 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
627 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
628 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
631 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
634 /* Choose the best mv out of 0,0 and the nearest mv */
635 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
637 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
638 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
640 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
641 mb->mode = VP8_MVMODE_SPLIT;
642 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
644 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
645 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
649 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
653 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
657 mb->mode = VP8_MVMODE_ZERO;
663 static av_always_inline
664 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
665 int mb_x, int keyframe, int layout)
667 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
670 VP8Macroblock *mb_top = mb - s->mb_width - 1;
671 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
676 uint8_t* const left = s->intra4x4_pred_mode_left;
678 top = mb->intra4x4_pred_mode_top;
680 top = s->intra4x4_pred_mode_top + 4 * mb_x;
681 for (y = 0; y < 4; y++) {
682 for (x = 0; x < 4; x++) {
684 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
685 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
686 left[y] = top[x] = *intra4x4;
692 for (i = 0; i < 16; i++)
693 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
697 static av_always_inline
698 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
699 uint8_t *segment, uint8_t *ref, int layout)
701 VP56RangeCoder *c = &s->c;
703 if (s->segmentation.update_map) {
704 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
705 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
706 } else if (s->segmentation.enabled)
707 *segment = ref ? *ref : *segment;
708 mb->segment = *segment;
710 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
713 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
715 if (mb->mode == MODE_I4x4) {
716 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
718 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
719 if (s->mb_layout == 1)
720 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
722 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
723 AV_WN32A( s->intra4x4_pred_mode_left, modes);
726 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
727 mb->ref_frame = VP56_FRAME_CURRENT;
728 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
730 if (vp56_rac_get_prob_branchy(c, s->prob->last))
731 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
732 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
734 mb->ref_frame = VP56_FRAME_PREVIOUS;
735 s->ref_count[mb->ref_frame-1]++;
737 // motion vectors, 16.3
738 decode_mvs(s, mb, mb_x, mb_y, layout);
741 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
743 if (mb->mode == MODE_I4x4)
744 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
746 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
747 mb->ref_frame = VP56_FRAME_CURRENT;
748 mb->partitioning = VP8_SPLITMVMODE_NONE;
749 AV_ZERO32(&mb->bmv[0]);
753 #ifndef decode_block_coeffs_internal
755 * @param r arithmetic bitstream reader context
756 * @param block destination for block coefficients
757 * @param probs probabilities to use when reading trees from the bitstream
758 * @param i initial coeff index, 0 unless a separate DC block is coded
759 * @param qmul array holding the dc/ac dequant factor at position 0/1
760 * @return 0 if no coeffs were decoded
761 * otherwise, the index of the last coeff decoded plus one
763 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
764 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
765 int i, uint8_t *token_prob, int16_t qmul[2])
767 VP56RangeCoder c = *r;
771 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
775 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
777 break; // invalid input; blocks should end with EOB
778 token_prob = probs[i][0];
782 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
784 token_prob = probs[i+1][1];
786 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
787 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
789 coeff += vp56_rac_get_prob(&c, token_prob[5]);
793 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
794 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
795 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
798 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
799 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
801 } else { // DCT_CAT3 and up
802 int a = vp56_rac_get_prob(&c, token_prob[8]);
803 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
804 int cat = (a<<1) + b;
805 coeff = 3 + (8<<cat);
806 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
809 token_prob = probs[i+1][2];
811 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
820 * @param c arithmetic bitstream reader context
821 * @param block destination for block coefficients
822 * @param probs probabilities to use when reading trees from the bitstream
823 * @param i initial coeff index, 0 unless a separate DC block is coded
824 * @param zero_nhood the initial prediction context for number of surrounding
825 * all-zero blocks (only left/top, so 0-2)
826 * @param qmul array holding the dc/ac dequant factor at position 0/1
827 * @return 0 if no coeffs were decoded
828 * otherwise, the index of the last coeff decoded plus one
830 static av_always_inline
831 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
832 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
833 int i, int zero_nhood, int16_t qmul[2])
835 uint8_t *token_prob = probs[i][zero_nhood];
836 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
838 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
841 static av_always_inline
842 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
843 uint8_t t_nnz[9], uint8_t l_nnz[9])
845 int i, x, y, luma_start = 0, luma_ctx = 3;
846 int nnz_pred, nnz, nnz_total = 0;
847 int segment = mb->segment;
850 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
851 nnz_pred = t_nnz[8] + l_nnz[8];
853 // decode DC values and do hadamard
854 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
855 s->qmat[segment].luma_dc_qmul);
856 l_nnz[8] = t_nnz[8] = !!nnz;
861 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
863 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
870 for (y = 0; y < 4; y++)
871 for (x = 0; x < 4; x++) {
872 nnz_pred = l_nnz[y] + t_nnz[x];
873 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
874 nnz_pred, s->qmat[segment].luma_qmul);
875 // nnz+block_dc may be one more than the actual last index, but we don't care
876 td->non_zero_count_cache[y][x] = nnz + block_dc;
877 t_nnz[x] = l_nnz[y] = !!nnz;
882 // TODO: what to do about dimensions? 2nd dim for luma is x,
883 // but for chroma it's (y<<1)|x
884 for (i = 4; i < 6; i++)
885 for (y = 0; y < 2; y++)
886 for (x = 0; x < 2; x++) {
887 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
888 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
889 nnz_pred, s->qmat[segment].chroma_qmul);
890 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
891 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
895 // if there were no coded coeffs despite the macroblock not being marked skip,
896 // we MUST not do the inner loop filter and should not do IDCT
897 // Since skip isn't used for bitstream prediction, just manually set it.
902 static av_always_inline
903 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
904 int linesize, int uvlinesize, int simple)
906 AV_COPY128(top_border, src_y + 15*linesize);
908 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
909 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
913 static av_always_inline
914 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
915 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
916 int simple, int xchg)
918 uint8_t *top_border_m1 = top_border-32; // for TL prediction
920 src_cb -= uvlinesize;
921 src_cr -= uvlinesize;
923 #define XCHG(a,b,xchg) do { \
924 if (xchg) AV_SWAP64(b,a); \
925 else AV_COPY64(b,a); \
928 XCHG(top_border_m1+8, src_y-8, xchg);
929 XCHG(top_border, src_y, xchg);
930 XCHG(top_border+8, src_y+8, 1);
931 if (mb_x < mb_width-1)
932 XCHG(top_border+32, src_y+16, 1);
934 // only copy chroma for normal loop filter
935 // or to initialize the top row to 127
936 if (!simple || !mb_y) {
937 XCHG(top_border_m1+16, src_cb-8, xchg);
938 XCHG(top_border_m1+24, src_cr-8, xchg);
939 XCHG(top_border+16, src_cb, 1);
940 XCHG(top_border+24, src_cr, 1);
944 static av_always_inline
945 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
948 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
950 return mb_y ? mode : LEFT_DC_PRED8x8;
954 static av_always_inline
955 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
958 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
960 return mb_y ? mode : HOR_PRED8x8;
964 static av_always_inline
965 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
967 if (mode == DC_PRED8x8) {
968 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
974 static av_always_inline
975 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
979 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
981 return !mb_y ? DC_127_PRED8x8 : mode;
983 return !mb_x ? DC_129_PRED8x8 : mode;
984 case PLANE_PRED8x8 /*TM*/:
985 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
990 static av_always_inline
991 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
994 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
996 return mb_y ? mode : HOR_VP8_PRED;
1000 static av_always_inline
1001 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1005 if (!mb_x && mb_y) {
1010 case DIAG_DOWN_LEFT_PRED:
1011 case VERT_LEFT_PRED:
1012 return !mb_y ? DC_127_PRED : mode;
1020 return !mb_x ? DC_129_PRED : mode;
1022 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1023 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1024 case DIAG_DOWN_RIGHT_PRED:
1025 case VERT_RIGHT_PRED:
1034 static av_always_inline
1035 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1036 VP8Macroblock *mb, int mb_x, int mb_y)
1038 AVCodecContext *avctx = s->avctx;
1039 int x, y, mode, nnz;
1042 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1043 // otherwise, skip it if we aren't going to deblock
1044 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1045 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1046 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1047 s->filter.simple, 1);
1049 if (mb->mode < MODE_I4x4) {
1050 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1051 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1053 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1055 s->hpc.pred16x16[mode](dst[0], s->linesize);
1057 uint8_t *ptr = dst[0];
1058 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1059 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1061 // all blocks on the right edge of the macroblock use bottom edge
1062 // the top macroblock for their topright edge
1063 uint8_t *tr_right = ptr - s->linesize + 16;
1065 // if we're on the right edge of the frame, said edge is extended
1066 // from the top macroblock
1067 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1068 mb_x == s->mb_width-1) {
1069 tr = tr_right[-1]*0x01010101u;
1070 tr_right = (uint8_t *)&tr;
1074 AV_ZERO128(td->non_zero_count_cache);
1076 for (y = 0; y < 4; y++) {
1077 uint8_t *topright = ptr + 4 - s->linesize;
1078 for (x = 0; x < 4; x++) {
1079 int copy = 0, linesize = s->linesize;
1080 uint8_t *dst = ptr+4*x;
1081 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1083 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1086 topright = tr_right;
1088 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1089 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1091 dst = copy_dst + 12;
1095 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1097 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1101 copy_dst[3] = ptr[4*x-s->linesize-1];
1108 copy_dst[35] = 129U;
1110 copy_dst[11] = ptr[4*x -1];
1111 copy_dst[19] = ptr[4*x+s->linesize -1];
1112 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1113 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1119 s->hpc.pred4x4[mode](dst, topright, linesize);
1121 AV_COPY32(ptr+4*x , copy_dst+12);
1122 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1123 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1124 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1127 nnz = td->non_zero_count_cache[y][x];
1130 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1132 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1137 ptr += 4*s->linesize;
1142 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1143 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1145 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1147 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1148 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1150 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1151 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1152 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1153 s->filter.simple, 0);
1156 static const uint8_t subpel_idx[3][8] = {
1157 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1158 // also function pointer index
1159 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1160 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1166 * @param s VP8 decoding context
1167 * @param dst target buffer for block data at block position
1168 * @param ref reference picture buffer at origin (0, 0)
1169 * @param mv motion vector (relative to block position) to get pixel data from
1170 * @param x_off horizontal position of block from origin (0, 0)
1171 * @param y_off vertical position of block from origin (0, 0)
1172 * @param block_w width of block (16, 8 or 4)
1173 * @param block_h height of block (always same as block_w)
1174 * @param width width of src/dst plane data
1175 * @param height height of src/dst plane data
1176 * @param linesize size of a single line of plane data, including padding
1177 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1179 static av_always_inline
1180 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1181 ThreadFrame *ref, const VP56mv *mv,
1182 int x_off, int y_off, int block_w, int block_h,
1183 int width, int height, int linesize,
1184 vp8_mc_func mc_func[3][3])
1186 uint8_t *src = ref->f->data[0];
1190 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1191 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1193 x_off += mv->x >> 2;
1194 y_off += mv->y >> 2;
1197 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1198 src += y_off * linesize + x_off;
1199 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1200 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1201 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1202 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1203 x_off - mx_idx, y_off - my_idx, width, height);
1204 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1206 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1208 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1214 * chroma MC function
1216 * @param s VP8 decoding context
1217 * @param dst1 target buffer for block data at block position (U plane)
1218 * @param dst2 target buffer for block data at block position (V plane)
1219 * @param ref reference picture buffer at origin (0, 0)
1220 * @param mv motion vector (relative to block position) to get pixel data from
1221 * @param x_off horizontal position of block from origin (0, 0)
1222 * @param y_off vertical position of block from origin (0, 0)
1223 * @param block_w width of block (16, 8 or 4)
1224 * @param block_h height of block (always same as block_w)
1225 * @param width width of src/dst plane data
1226 * @param height height of src/dst plane data
1227 * @param linesize size of a single line of plane data, including padding
1228 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1230 static av_always_inline
1231 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1232 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233 int block_w, int block_h, int width, int height, int linesize,
1234 vp8_mc_func mc_func[3][3])
1236 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1239 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1240 int my = mv->y&7, my_idx = subpel_idx[0][my];
1242 x_off += mv->x >> 3;
1243 y_off += mv->y >> 3;
1246 src1 += y_off * linesize + x_off;
1247 src2 += y_off * linesize + x_off;
1248 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1250 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1251 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1252 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1253 x_off - mx_idx, y_off - my_idx, width, height);
1254 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1255 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1257 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1258 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1259 x_off - mx_idx, y_off - my_idx, width, height);
1260 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1261 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1263 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1264 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1267 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1268 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1269 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1273 static av_always_inline
1274 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1275 ThreadFrame *ref_frame, int x_off, int y_off,
1276 int bx_off, int by_off,
1277 int block_w, int block_h,
1278 int width, int height, VP56mv *mv)
1283 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1284 ref_frame, mv, x_off + bx_off, y_off + by_off,
1285 block_w, block_h, width, height, s->linesize,
1286 s->put_pixels_tab[block_w == 8]);
1289 if (s->profile == 3) {
1293 x_off >>= 1; y_off >>= 1;
1294 bx_off >>= 1; by_off >>= 1;
1295 width >>= 1; height >>= 1;
1296 block_w >>= 1; block_h >>= 1;
1297 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1298 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1299 &uvmv, x_off + bx_off, y_off + by_off,
1300 block_w, block_h, width, height, s->uvlinesize,
1301 s->put_pixels_tab[1 + (block_w == 4)]);
1304 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1305 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1306 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1308 /* Don't prefetch refs that haven't been used very often this frame. */
1309 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1310 int x_off = mb_x << 4, y_off = mb_y << 4;
1311 int mx = (mb->mv.x>>2) + x_off + 8;
1312 int my = (mb->mv.y>>2) + y_off;
1313 uint8_t **src= s->framep[ref]->tf.f->data;
1314 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1315 /* For threading, a ff_thread_await_progress here might be useful, but
1316 * it actually slows down the decoder. Since a bad prefetch doesn't
1317 * generate bad decoder output, we don't run it here. */
1318 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1319 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1320 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1325 * Apply motion vectors to prediction buffer, chapter 18.
1327 static av_always_inline
1328 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1329 VP8Macroblock *mb, int mb_x, int mb_y)
1331 int x_off = mb_x << 4, y_off = mb_y << 4;
1332 int width = 16*s->mb_width, height = 16*s->mb_height;
1333 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1334 VP56mv *bmv = mb->bmv;
1336 switch (mb->partitioning) {
1337 case VP8_SPLITMVMODE_NONE:
1338 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1339 0, 0, 16, 16, width, height, &mb->mv);
1341 case VP8_SPLITMVMODE_4x4: {
1346 for (y = 0; y < 4; y++) {
1347 for (x = 0; x < 4; x++) {
1348 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1350 4*x + x_off, 4*y + y_off, 4, 4,
1351 width, height, s->linesize,
1352 s->put_pixels_tab[2]);
1357 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1358 for (y = 0; y < 2; y++) {
1359 for (x = 0; x < 2; x++) {
1360 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1361 mb->bmv[ 2*y * 4 + 2*x+1].x +
1362 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1363 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1364 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1365 mb->bmv[ 2*y * 4 + 2*x+1].y +
1366 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1367 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1368 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1369 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1370 if (s->profile == 3) {
1374 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1375 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1376 4*x + x_off, 4*y + y_off, 4, 4,
1377 width, height, s->uvlinesize,
1378 s->put_pixels_tab[2]);
1383 case VP8_SPLITMVMODE_16x8:
1384 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385 0, 0, 16, 8, width, height, &bmv[0]);
1386 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387 0, 8, 16, 8, width, height, &bmv[1]);
1389 case VP8_SPLITMVMODE_8x16:
1390 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391 0, 0, 8, 16, width, height, &bmv[0]);
1392 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393 8, 0, 8, 16, width, height, &bmv[1]);
1395 case VP8_SPLITMVMODE_8x8:
1396 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397 0, 0, 8, 8, width, height, &bmv[0]);
1398 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399 8, 0, 8, 8, width, height, &bmv[1]);
1400 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401 0, 8, 8, 8, width, height, &bmv[2]);
1402 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403 8, 8, 8, 8, width, height, &bmv[3]);
1408 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1409 uint8_t *dst[3], VP8Macroblock *mb)
1413 if (mb->mode != MODE_I4x4) {
1414 uint8_t *y_dst = dst[0];
1415 for (y = 0; y < 4; y++) {
1416 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1418 if (nnz4&~0x01010101) {
1419 for (x = 0; x < 4; x++) {
1420 if ((uint8_t)nnz4 == 1)
1421 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1422 else if((uint8_t)nnz4 > 1)
1423 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1429 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1432 y_dst += 4*s->linesize;
1436 for (ch = 0; ch < 2; ch++) {
1437 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1439 uint8_t *ch_dst = dst[1+ch];
1440 if (nnz4&~0x01010101) {
1441 for (y = 0; y < 2; y++) {
1442 for (x = 0; x < 2; x++) {
1443 if ((uint8_t)nnz4 == 1)
1444 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1445 else if((uint8_t)nnz4 > 1)
1446 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1449 goto chroma_idct_end;
1451 ch_dst += 4*s->uvlinesize;
1454 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1461 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1463 int interior_limit, filter_level;
1465 if (s->segmentation.enabled) {
1466 filter_level = s->segmentation.filter_level[mb->segment];
1467 if (!s->segmentation.absolute_vals)
1468 filter_level += s->filter.level;
1470 filter_level = s->filter.level;
1472 if (s->lf_delta.enabled) {
1473 filter_level += s->lf_delta.ref[mb->ref_frame];
1474 filter_level += s->lf_delta.mode[mb->mode];
1477 filter_level = av_clip_uintp2(filter_level, 6);
1479 interior_limit = filter_level;
1480 if (s->filter.sharpness) {
1481 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1482 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1484 interior_limit = FFMAX(interior_limit, 1);
1486 f->filter_level = filter_level;
1487 f->inner_limit = interior_limit;
1488 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1491 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1493 int mbedge_lim, bedge_lim, hev_thresh;
1494 int filter_level = f->filter_level;
1495 int inner_limit = f->inner_limit;
1496 int inner_filter = f->inner_filter;
1497 int linesize = s->linesize;
1498 int uvlinesize = s->uvlinesize;
1499 static const uint8_t hev_thresh_lut[2][64] = {
1500 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1501 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1502 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1506 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1513 bedge_lim = 2*filter_level + inner_limit;
1514 mbedge_lim = bedge_lim + 4;
1516 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1519 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1520 mbedge_lim, inner_limit, hev_thresh);
1521 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1522 mbedge_lim, inner_limit, hev_thresh);
1526 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1527 inner_limit, hev_thresh);
1528 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1529 inner_limit, hev_thresh);
1530 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1531 inner_limit, hev_thresh);
1532 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1533 uvlinesize, bedge_lim,
1534 inner_limit, hev_thresh);
1538 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1539 mbedge_lim, inner_limit, hev_thresh);
1540 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1541 mbedge_lim, inner_limit, hev_thresh);
1545 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1546 linesize, bedge_lim,
1547 inner_limit, hev_thresh);
1548 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1549 linesize, bedge_lim,
1550 inner_limit, hev_thresh);
1551 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1552 linesize, bedge_lim,
1553 inner_limit, hev_thresh);
1554 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1555 dst[2] + 4 * uvlinesize,
1556 uvlinesize, bedge_lim,
1557 inner_limit, hev_thresh);
1561 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1563 int mbedge_lim, bedge_lim;
1564 int filter_level = f->filter_level;
1565 int inner_limit = f->inner_limit;
1566 int inner_filter = f->inner_filter;
1567 int linesize = s->linesize;
1572 bedge_lim = 2*filter_level + inner_limit;
1573 mbedge_lim = bedge_lim + 4;
1576 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1578 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1579 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1584 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1586 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1587 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1592 #define MARGIN (16 << 2)
1593 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1594 VP8Frame *prev_frame)
1596 VP8Context *s = avctx->priv_data;
1599 s->mv_min.y = -MARGIN;
1600 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1601 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1602 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1603 int mb_xy = mb_y*s->mb_width;
1605 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1607 s->mv_min.x = -MARGIN;
1608 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1609 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1611 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1612 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1613 prev_frame && prev_frame->seg_map ?
1614 prev_frame->seg_map->data + mb_xy : NULL, 1);
1624 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1626 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1627 if (otd->thread_mb_pos < tmp) {\
1628 pthread_mutex_lock(&otd->lock);\
1629 td->wait_mb_pos = tmp;\
1631 if (otd->thread_mb_pos >= tmp)\
1633 pthread_cond_wait(&otd->cond, &otd->lock);\
1635 td->wait_mb_pos = INT_MAX;\
1636 pthread_mutex_unlock(&otd->lock);\
1640 #define update_pos(td, mb_y, mb_x)\
1642 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1643 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1644 int is_null = (next_td == NULL) || (prev_td == NULL);\
1645 int pos_check = (is_null) ? 1 :\
1646 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1647 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1648 td->thread_mb_pos = pos;\
1649 if (sliced_threading && pos_check) {\
1650 pthread_mutex_lock(&td->lock);\
1651 pthread_cond_broadcast(&td->cond);\
1652 pthread_mutex_unlock(&td->lock);\
1656 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1657 #define update_pos(td, mb_y, mb_x)
1660 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1661 int jobnr, int threadnr)
1663 VP8Context *s = avctx->priv_data;
1664 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1665 int mb_y = td->thread_mb_pos>>16;
1666 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1667 int num_jobs = s->num_jobs;
1668 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1669 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1672 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1673 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1674 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1676 if (mb_y == 0) prev_td = td;
1677 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1678 if (mb_y == s->mb_height-1) next_td = td;
1679 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1680 if (s->mb_layout == 1)
1681 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1683 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1684 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1685 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1688 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1689 // left edge of 129 for intra prediction
1690 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1691 for (i = 0; i < 3; i++)
1692 for (y = 0; y < 16>>!!i; y++)
1693 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1695 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1699 s->mv_min.x = -MARGIN;
1700 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1702 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1703 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1704 if (prev_td != td) {
1705 if (threadnr != 0) {
1706 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1708 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1712 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1713 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1716 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1717 prev_frame && prev_frame->seg_map ?
1718 prev_frame->seg_map->data + mb_xy : NULL, 0);
1720 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1723 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1725 if (mb->mode <= MODE_I4x4)
1726 intra_predict(s, td, dst, mb, mb_x, mb_y);
1728 inter_predict(s, td, dst, mb, mb_x, mb_y);
1730 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1733 idct_mb(s, td, dst, mb);
1735 AV_ZERO64(td->left_nnz);
1736 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1738 // Reset DC block predictors if they would exist if the mb had coefficients
1739 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1740 td->left_nnz[8] = 0;
1741 s->top_nnz[mb_x][8] = 0;
1745 if (s->deblock_filter)
1746 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1748 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1749 if (s->filter.simple)
1750 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1752 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1755 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1763 if (mb_x == s->mb_width+1) {
1764 update_pos(td, mb_y, s->mb_width+3);
1766 update_pos(td, mb_y, mb_x);
1771 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1772 int jobnr, int threadnr)
1774 VP8Context *s = avctx->priv_data;
1775 VP8ThreadData *td = &s->thread_data[threadnr];
1776 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1777 AVFrame *curframe = s->curframe->tf.f;
1779 VP8ThreadData *prev_td, *next_td;
1781 curframe->data[0] + 16*mb_y*s->linesize,
1782 curframe->data[1] + 8*mb_y*s->uvlinesize,
1783 curframe->data[2] + 8*mb_y*s->uvlinesize
1786 if (s->mb_layout == 1)
1787 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1789 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1791 if (mb_y == 0) prev_td = td;
1792 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1793 if (mb_y == s->mb_height-1) next_td = td;
1794 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1796 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1797 VP8FilterStrength *f = &td->filter_strength[mb_x];
1798 if (prev_td != td) {
1799 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1802 if (next_td != &s->thread_data[0]) {
1803 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1806 if (num_jobs == 1) {
1807 if (s->filter.simple)
1808 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1810 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1813 if (s->filter.simple)
1814 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1816 filter_mb(s, dst, f, mb_x, mb_y);
1821 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1825 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1826 int jobnr, int threadnr)
1828 VP8Context *s = avctx->priv_data;
1829 VP8ThreadData *td = &s->thread_data[jobnr];
1830 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1831 VP8Frame *curframe = s->curframe;
1832 int mb_y, num_jobs = s->num_jobs;
1833 td->thread_nr = threadnr;
1834 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1835 if (mb_y >= s->mb_height) break;
1836 td->thread_mb_pos = mb_y<<16;
1837 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1838 if (s->deblock_filter)
1839 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1840 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1845 if (avctx->active_thread_type == FF_THREAD_FRAME)
1846 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1852 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1855 VP8Context *s = avctx->priv_data;
1856 int ret, i, referenced, num_jobs;
1857 enum AVDiscard skip_thresh;
1858 VP8Frame *av_uninit(curframe), *prev_frame;
1860 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1863 prev_frame = s->framep[VP56_FRAME_CURRENT];
1865 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1866 || s->update_altref == VP56_FRAME_CURRENT;
1868 skip_thresh = !referenced ? AVDISCARD_NONREF :
1869 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1871 if (avctx->skip_frame >= skip_thresh) {
1873 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1876 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1878 // release no longer referenced frames
1879 for (i = 0; i < 5; i++)
1880 if (s->frames[i].tf.f->data[0] &&
1881 &s->frames[i] != prev_frame &&
1882 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1883 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1884 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1885 vp8_release_frame(s, &s->frames[i]);
1887 // find a free buffer
1888 for (i = 0; i < 5; i++)
1889 if (&s->frames[i] != prev_frame &&
1890 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1891 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1893 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1897 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1900 if (curframe->tf.f->data[0])
1901 vp8_release_frame(s, curframe);
1903 // Given that arithmetic probabilities are updated every frame, it's quite likely
1904 // that the values we have on a random interframe are complete junk if we didn't
1905 // start decode on a keyframe. So just don't display anything rather than junk.
1906 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1907 !s->framep[VP56_FRAME_GOLDEN] ||
1908 !s->framep[VP56_FRAME_GOLDEN2])) {
1909 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1910 ret = AVERROR_INVALIDDATA;
1914 curframe->tf.f->key_frame = s->keyframe;
1915 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1916 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1919 // check if golden and altref are swapped
1920 if (s->update_altref != VP56_FRAME_NONE) {
1921 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1923 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1925 if (s->update_golden != VP56_FRAME_NONE) {
1926 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1928 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1930 if (s->update_last) {
1931 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1933 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1935 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1937 ff_thread_finish_setup(avctx);
1939 s->linesize = curframe->tf.f->linesize[0];
1940 s->uvlinesize = curframe->tf.f->linesize[1];
1942 if (!s->thread_data[0].edge_emu_buffer)
1943 for (i = 0; i < MAX_THREADS; i++)
1944 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1946 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1947 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1949 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1950 if (!s->mb_layout && s->keyframe)
1951 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1953 // top edge of 127 for intra prediction
1954 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1955 s->top_border[0][15] = s->top_border[0][23] = 127;
1956 s->top_border[0][31] = 127;
1957 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1959 memset(s->ref_count, 0, sizeof(s->ref_count));
1962 // Make sure the previous frame has read its segmentation map,
1963 // if we re-use the same map.
1964 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1965 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1967 if (s->mb_layout == 1)
1968 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1970 if (avctx->active_thread_type == FF_THREAD_FRAME)
1973 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1974 s->num_jobs = num_jobs;
1975 s->curframe = curframe;
1976 s->prev_frame = prev_frame;
1977 s->mv_min.y = -MARGIN;
1978 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1979 for (i = 0; i < MAX_THREADS; i++) {
1980 s->thread_data[i].thread_mb_pos = 0;
1981 s->thread_data[i].wait_mb_pos = INT_MAX;
1983 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1985 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1986 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1989 // if future frames don't use the updated probabilities,
1990 // reset them to the values we saved
1991 if (!s->update_probabilities)
1992 s->prob[0] = s->prob[1];
1994 if (!s->invisible) {
1995 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2002 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2006 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2008 VP8Context *s = avctx->priv_data;
2011 vp8_decode_flush_impl(avctx, 1);
2012 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2013 av_frame_free(&s->frames[i].tf.f);
2018 static av_cold int vp8_init_frames(VP8Context *s)
2021 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2022 s->frames[i].tf.f = av_frame_alloc();
2023 if (!s->frames[i].tf.f)
2024 return AVERROR(ENOMEM);
2029 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2031 VP8Context *s = avctx->priv_data;
2035 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2036 avctx->internal->allocate_progress = 1;
2038 ff_videodsp_init(&s->vdsp, 8);
2039 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2040 ff_vp8dsp_init(&s->vp8dsp);
2042 if ((ret = vp8_init_frames(s)) < 0) {
2043 vp8_decode_free(avctx);
2050 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2052 VP8Context *s = avctx->priv_data;
2057 if ((ret = vp8_init_frames(s)) < 0) {
2058 vp8_decode_free(avctx);
2065 #define REBASE(pic) \
2066 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2068 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2070 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2073 if (s->macroblocks_base &&
2074 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2076 s->mb_width = s_src->mb_width;
2077 s->mb_height = s_src->mb_height;
2080 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2081 s->segmentation = s_src->segmentation;
2082 s->lf_delta = s_src->lf_delta;
2083 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2085 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2086 if (s_src->frames[i].tf.f->data[0]) {
2087 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2093 s->framep[0] = REBASE(s_src->next_framep[0]);
2094 s->framep[1] = REBASE(s_src->next_framep[1]);
2095 s->framep[2] = REBASE(s_src->next_framep[2]);
2096 s->framep[3] = REBASE(s_src->next_framep[3]);
2101 static unsigned apply_padding(unsigned size) { return size + (size & 1); }
2103 static int webp_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
2106 const uint8_t *buf = avpkt->data;
2107 int buf_size = avpkt->size;
2108 AVPacket pkt = *avpkt;
2111 && AV_RL32(buf ) == AV_RL32("RIFF")
2112 && AV_RL32(buf+ 8) == AV_RL32("WEBP")) {
2113 unsigned riff_size = apply_padding(AV_RL32(buf+4)) + 8;
2114 buf += 12; // Skip over main header
2116 if (buf_size < 8 || riff_size < 8) {
2117 av_log(avctx, AV_LOG_ERROR, "Incomplete header.\n");
2118 return AVERROR_INVALIDDATA;
2120 if (AV_RL32(buf) == AV_RL32("VP8L")) {
2121 av_log(avctx, AV_LOG_ERROR, "Unsupported WebP lossless format.\n");
2122 return AVERROR_PATCHWELCOME;
2124 if (AV_RL32(buf) == AV_RL32("VP8X") && AV_RL32(buf+4) < (unsigned)buf_size) {
2125 unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2130 && AV_RL32(buf) == AV_RL32("ALPH") && AV_RL32(buf+4) < (unsigned)buf_size) {
2131 unsigned size = apply_padding(AV_RL32(buf+4) + 8);
2134 av_log(avctx, AV_LOG_WARNING, "Skipping alpha plane\n");
2136 if (buf_size >= 8 && AV_RL32(buf) == AV_RL32("VP8 ")) {
2142 pkt.size = buf_size;
2144 return vp8_decode_frame(avctx, data, data_size, &pkt);
2147 AVCodec ff_vp8_decoder = {
2149 .type = AVMEDIA_TYPE_VIDEO,
2150 .id = AV_CODEC_ID_VP8,
2151 .priv_data_size = sizeof(VP8Context),
2152 .init = vp8_decode_init,
2153 .close = vp8_decode_free,
2154 .decode = vp8_decode_frame,
2155 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2156 .flush = vp8_decode_flush,
2157 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2158 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2159 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2162 AVCodec ff_webp_decoder = {
2164 .type = AVMEDIA_TYPE_VIDEO,
2165 .id = AV_CODEC_ID_WEBP,
2166 .priv_data_size = sizeof(VP8Context),
2167 .init = vp8_decode_init,
2168 .close = vp8_decode_free,
2169 .decode = webp_decode_frame,
2170 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2171 .flush = vp8_decode_flush,
2172 .long_name = NULL_IF_CONFIG_SMALL("WebP"),
2173 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2174 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),