2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of FFmpeg.
11 * FFmpeg is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * FFmpeg is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with FFmpeg; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
38 static void free_buffers(VP8Context *s)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
52 s->macroblocks = NULL;
55 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
58 if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
60 if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
62 } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
63 ff_thread_release_buffer(s->avctx, f);
64 return AVERROR(ENOMEM);
69 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
71 if (f->ref_index[0]) {
72 if (prefer_delayed_free) {
73 /* Upon a size change, we want to free the maps but other threads may still
74 * be using them, so queue them. Upon a seek, all threads are inactive so
75 * we want to cache one to prevent re-allocation in the next decoding
76 * iteration, but the rest we can free directly. */
77 int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
78 if (s->num_maps_to_be_freed < max_queued_maps) {
79 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
80 } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
81 av_free(f->ref_index[0]);
82 } /* else: MEMLEAK (should never happen, but better that than crash) */
83 f->ref_index[0] = NULL;
84 } else /* vp8_decode_free() */ {
85 av_free(f->ref_index[0]);
88 ff_thread_release_buffer(s->avctx, f);
91 static void vp8_decode_flush_impl(AVCodecContext *avctx,
92 int prefer_delayed_free, int can_direct_free, int free_mem)
94 VP8Context *s = avctx->priv_data;
97 if (!avctx->internal->is_copy) {
98 for (i = 0; i < 5; i++)
99 if (s->frames[i].data[0])
100 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
102 memset(s->framep, 0, sizeof(s->framep));
106 s->maps_are_invalid = 1;
110 static void vp8_decode_flush(AVCodecContext *avctx)
112 vp8_decode_flush_impl(avctx, 1, 1, 0);
115 static int update_dimensions(VP8Context *s, int width, int height)
117 AVCodecContext *avctx = s->avctx;
120 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
121 height != s->avctx->height) {
122 if (av_image_check_size(width, height, 0, s->avctx))
123 return AVERROR_INVALIDDATA;
125 vp8_decode_flush_impl(s->avctx, 1, 0, 1);
127 avcodec_set_dimensions(s->avctx, width, height);
130 s->mb_width = (s->avctx->coded_width +15) / 16;
131 s->mb_height = (s->avctx->coded_height+15) / 16;
133 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
134 if (!s->mb_layout) { // Frame threading and one thread
135 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
136 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
138 else // Sliced threading
139 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
140 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
141 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
142 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
144 for (i = 0; i < MAX_THREADS; i++) {
145 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
147 pthread_mutex_init(&s->thread_data[i].lock, NULL);
148 pthread_cond_init(&s->thread_data[i].cond, NULL);
152 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
153 (!s->intra4x4_pred_mode_top && !s->mb_layout))
154 return AVERROR(ENOMEM);
156 s->macroblocks = s->macroblocks_base + 1;
161 static void parse_segment_info(VP8Context *s)
163 VP56RangeCoder *c = &s->c;
166 s->segmentation.update_map = vp8_rac_get(c);
168 if (vp8_rac_get(c)) { // update segment feature data
169 s->segmentation.absolute_vals = vp8_rac_get(c);
171 for (i = 0; i < 4; i++)
172 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
174 for (i = 0; i < 4; i++)
175 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
177 if (s->segmentation.update_map)
178 for (i = 0; i < 3; i++)
179 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
182 static void update_lf_deltas(VP8Context *s)
184 VP56RangeCoder *c = &s->c;
187 for (i = 0; i < 4; i++) {
188 if (vp8_rac_get(c)) {
189 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
192 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
196 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
197 if (vp8_rac_get(c)) {
198 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
201 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
206 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
208 const uint8_t *sizes = buf;
211 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
213 buf += 3*(s->num_coeff_partitions-1);
214 buf_size -= 3*(s->num_coeff_partitions-1);
218 for (i = 0; i < s->num_coeff_partitions-1; i++) {
219 int size = AV_RL24(sizes + 3*i);
220 if (buf_size - size < 0)
223 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
227 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
232 static void get_quants(VP8Context *s)
234 VP56RangeCoder *c = &s->c;
237 int yac_qi = vp8_rac_get_uint(c, 7);
238 int ydc_delta = vp8_rac_get_sint(c, 4);
239 int y2dc_delta = vp8_rac_get_sint(c, 4);
240 int y2ac_delta = vp8_rac_get_sint(c, 4);
241 int uvdc_delta = vp8_rac_get_sint(c, 4);
242 int uvac_delta = vp8_rac_get_sint(c, 4);
244 for (i = 0; i < 4; i++) {
245 if (s->segmentation.enabled) {
246 base_qi = s->segmentation.base_quant[i];
247 if (!s->segmentation.absolute_vals)
252 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
253 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
254 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
255 /* 101581>>16 is equivalent to 155/100 */
256 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
257 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
258 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
260 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
261 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
266 * Determine which buffers golden and altref should be updated with after this frame.
267 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
269 * Intra frames update all 3 references
270 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
271 * If the update (golden|altref) flag is set, it's updated with the current frame
272 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
273 * If the flag is not set, the number read means:
275 * 1: VP56_FRAME_PREVIOUS
276 * 2: update golden with altref, or update altref with golden
278 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
280 VP56RangeCoder *c = &s->c;
283 return VP56_FRAME_CURRENT;
285 switch (vp8_rac_get_uint(c, 2)) {
287 return VP56_FRAME_PREVIOUS;
289 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
291 return VP56_FRAME_NONE;
294 static void update_refs(VP8Context *s)
296 VP56RangeCoder *c = &s->c;
298 int update_golden = vp8_rac_get(c);
299 int update_altref = vp8_rac_get(c);
301 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
302 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
305 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
307 VP56RangeCoder *c = &s->c;
308 int header_size, hscale, vscale, i, j, k, l, m, ret;
309 int width = s->avctx->width;
310 int height = s->avctx->height;
312 s->keyframe = !(buf[0] & 1);
313 s->profile = (buf[0]>>1) & 7;
314 s->invisible = !(buf[0] & 0x10);
315 header_size = AV_RL24(buf) >> 5;
320 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
323 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
324 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
325 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
327 if (header_size > buf_size - 7*s->keyframe) {
328 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
329 return AVERROR_INVALIDDATA;
333 if (AV_RL24(buf) != 0x2a019d) {
334 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
335 return AVERROR_INVALIDDATA;
337 width = AV_RL16(buf+3) & 0x3fff;
338 height = AV_RL16(buf+5) & 0x3fff;
339 hscale = buf[4] >> 6;
340 vscale = buf[6] >> 6;
344 if (hscale || vscale)
345 av_log_missing_feature(s->avctx, "Upscaling", 1);
347 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
348 for (i = 0; i < 4; i++)
349 for (j = 0; j < 16; j++)
350 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
351 sizeof(s->prob->token[i][j]));
352 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
353 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
354 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
355 memset(&s->segmentation, 0, sizeof(s->segmentation));
356 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
359 ff_vp56_init_range_decoder(c, buf, header_size);
361 buf_size -= header_size;
365 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
366 vp8_rac_get(c); // whether we can skip clamping in dsp functions
369 if ((s->segmentation.enabled = vp8_rac_get(c)))
370 parse_segment_info(s);
372 s->segmentation.update_map = 0; // FIXME: move this to some init function?
374 s->filter.simple = vp8_rac_get(c);
375 s->filter.level = vp8_rac_get_uint(c, 6);
376 s->filter.sharpness = vp8_rac_get_uint(c, 3);
378 if ((s->lf_delta.enabled = vp8_rac_get(c)))
382 if (setup_partitions(s, buf, buf_size)) {
383 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
384 return AVERROR_INVALIDDATA;
387 if (!s->macroblocks_base || /* first frame */
388 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
389 if ((ret = update_dimensions(s, width, height)) < 0)
397 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
398 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
401 // if we aren't saving this frame's probabilities for future frames,
402 // make a copy of the current probabilities
403 if (!(s->update_probabilities = vp8_rac_get(c)))
404 s->prob[1] = s->prob[0];
406 s->update_last = s->keyframe || vp8_rac_get(c);
408 for (i = 0; i < 4; i++)
409 for (j = 0; j < 8; j++)
410 for (k = 0; k < 3; k++)
411 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
412 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
413 int prob = vp8_rac_get_uint(c, 8);
414 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
415 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
418 if ((s->mbskip_enabled = vp8_rac_get(c)))
419 s->prob->mbskip = vp8_rac_get_uint(c, 8);
422 s->prob->intra = vp8_rac_get_uint(c, 8);
423 s->prob->last = vp8_rac_get_uint(c, 8);
424 s->prob->golden = vp8_rac_get_uint(c, 8);
427 for (i = 0; i < 4; i++)
428 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
430 for (i = 0; i < 3; i++)
431 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
433 // 17.2 MV probability update
434 for (i = 0; i < 2; i++)
435 for (j = 0; j < 19; j++)
436 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
437 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
443 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
445 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
446 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
450 * Motion vector coding, 17.1.
452 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
456 if (vp56_rac_get_prob_branchy(c, p[0])) {
459 for (i = 0; i < 3; i++)
460 x += vp56_rac_get_prob(c, p[9 + i]) << i;
461 for (i = 9; i > 3; i--)
462 x += vp56_rac_get_prob(c, p[9 + i]) << i;
463 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
467 const uint8_t *ps = p+2;
468 bit = vp56_rac_get_prob(c, *ps);
471 bit = vp56_rac_get_prob(c, *ps);
474 x += vp56_rac_get_prob(c, *ps);
477 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
480 static av_always_inline
481 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
484 return vp8_submv_prob[4-!!left];
486 return vp8_submv_prob[2];
487 return vp8_submv_prob[1-!!left];
491 * Split motion vector prediction, 16.4.
492 * @returns the number of motion vectors parsed (2, 4 or 16)
494 static av_always_inline
495 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
499 VP8Macroblock *top_mb;
500 VP8Macroblock *left_mb = &mb[-1];
501 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
503 *mbsplits_cur, *firstidx;
505 VP56mv *left_mv = left_mb->bmv;
506 VP56mv *cur_mv = mb->bmv;
508 if (!layout) // layout is inlined, s->mb_layout is not
511 top_mb = &mb[-s->mb_width-1];
512 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
513 top_mv = top_mb->bmv;
515 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
516 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
517 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
519 part_idx = VP8_SPLITMVMODE_8x8;
522 part_idx = VP8_SPLITMVMODE_4x4;
525 num = vp8_mbsplit_count[part_idx];
526 mbsplits_cur = vp8_mbsplits[part_idx],
527 firstidx = vp8_mbfirstidx[part_idx];
528 mb->partitioning = part_idx;
530 for (n = 0; n < num; n++) {
532 uint32_t left, above;
533 const uint8_t *submv_prob;
536 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
538 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
540 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
542 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
544 submv_prob = get_submv_prob(left, above);
546 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
547 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
548 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
549 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
550 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
552 AV_ZERO32(&mb->bmv[n]);
555 AV_WN32A(&mb->bmv[n], above);
558 AV_WN32A(&mb->bmv[n], left);
565 static av_always_inline
566 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
568 VP8Macroblock *mb_edge[3] = { 0 /* top */,
571 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
572 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
574 int cur_sign_bias = s->sign_bias[mb->ref_frame];
575 int8_t *sign_bias = s->sign_bias;
577 uint8_t cnt[4] = { 0 };
578 VP56RangeCoder *c = &s->c;
580 if (!layout) { // layout is inlined (s->mb_layout is not)
585 mb_edge[0] = mb - s->mb_width-1;
586 mb_edge[2] = mb - s->mb_width-2;
589 AV_ZERO32(&near_mv[0]);
590 AV_ZERO32(&near_mv[1]);
591 AV_ZERO32(&near_mv[2]);
593 /* Process MB on top, left and top-left */
594 #define MV_EDGE_CHECK(n)\
596 VP8Macroblock *edge = mb_edge[n];\
597 int edge_ref = edge->ref_frame;\
598 if (edge_ref != VP56_FRAME_CURRENT) {\
599 uint32_t mv = AV_RN32A(&edge->mv);\
601 if (cur_sign_bias != sign_bias[edge_ref]) {\
602 /* SWAR negate of the values in mv. */\
604 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
606 if (!n || mv != AV_RN32A(&near_mv[idx]))\
607 AV_WN32A(&near_mv[++idx], mv);\
608 cnt[idx] += 1 + (n != 2);\
610 cnt[CNT_ZERO] += 1 + (n != 2);\
618 mb->partitioning = VP8_SPLITMVMODE_NONE;
619 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
620 mb->mode = VP8_MVMODE_MV;
622 /* If we have three distinct MVs, merge first and last if they're the same */
623 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
624 cnt[CNT_NEAREST] += 1;
626 /* Swap near and nearest if necessary */
627 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
628 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
629 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
632 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
633 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
635 /* Choose the best mv out of 0,0 and the nearest mv */
636 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
637 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
638 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
639 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
641 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
642 mb->mode = VP8_MVMODE_SPLIT;
643 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
645 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
646 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
650 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
654 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
658 mb->mode = VP8_MVMODE_ZERO;
664 static av_always_inline
665 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
666 int mb_x, int keyframe, int layout)
668 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
671 VP8Macroblock *mb_top = mb - s->mb_width - 1;
672 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
677 uint8_t* const left = s->intra4x4_pred_mode_left;
679 top = mb->intra4x4_pred_mode_top;
681 top = s->intra4x4_pred_mode_top + 4 * mb_x;
682 for (y = 0; y < 4; y++) {
683 for (x = 0; x < 4; x++) {
685 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
686 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
687 left[y] = top[x] = *intra4x4;
693 for (i = 0; i < 16; i++)
694 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
698 static av_always_inline
699 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
700 uint8_t *segment, uint8_t *ref, int layout)
702 VP56RangeCoder *c = &s->c;
704 if (s->segmentation.update_map) {
705 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
706 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
707 } else if (s->segmentation.enabled)
708 *segment = ref ? *ref : *segment;
709 mb->segment = *segment;
711 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
714 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
716 if (mb->mode == MODE_I4x4) {
717 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
719 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
720 if (s->mb_layout == 1)
721 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
723 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
724 AV_WN32A( s->intra4x4_pred_mode_left, modes);
727 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
728 mb->ref_frame = VP56_FRAME_CURRENT;
729 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
731 if (vp56_rac_get_prob_branchy(c, s->prob->last))
732 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
733 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
735 mb->ref_frame = VP56_FRAME_PREVIOUS;
736 s->ref_count[mb->ref_frame-1]++;
738 // motion vectors, 16.3
739 decode_mvs(s, mb, mb_x, mb_y, layout);
742 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
744 if (mb->mode == MODE_I4x4)
745 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
747 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
748 mb->ref_frame = VP56_FRAME_CURRENT;
749 mb->partitioning = VP8_SPLITMVMODE_NONE;
750 AV_ZERO32(&mb->bmv[0]);
754 #ifndef decode_block_coeffs_internal
756 * @param r arithmetic bitstream reader context
757 * @param block destination for block coefficients
758 * @param probs probabilities to use when reading trees from the bitstream
759 * @param i initial coeff index, 0 unless a separate DC block is coded
760 * @param qmul array holding the dc/ac dequant factor at position 0/1
761 * @return 0 if no coeffs were decoded
762 * otherwise, the index of the last coeff decoded plus one
764 static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
765 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
766 int i, uint8_t *token_prob, int16_t qmul[2])
768 VP56RangeCoder c = *r;
772 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
776 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
778 break; // invalid input; blocks should end with EOB
779 token_prob = probs[i][0];
783 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
785 token_prob = probs[i+1][1];
787 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
788 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
790 coeff += vp56_rac_get_prob(&c, token_prob[5]);
794 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
795 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
796 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
799 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
800 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
802 } else { // DCT_CAT3 and up
803 int a = vp56_rac_get_prob(&c, token_prob[8]);
804 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
805 int cat = (a<<1) + b;
806 coeff = 3 + (8<<cat);
807 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
810 token_prob = probs[i+1][2];
812 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
821 * @param c arithmetic bitstream reader context
822 * @param block destination for block coefficients
823 * @param probs probabilities to use when reading trees from the bitstream
824 * @param i initial coeff index, 0 unless a separate DC block is coded
825 * @param zero_nhood the initial prediction context for number of surrounding
826 * all-zero blocks (only left/top, so 0-2)
827 * @param qmul array holding the dc/ac dequant factor at position 0/1
828 * @return 0 if no coeffs were decoded
829 * otherwise, the index of the last coeff decoded plus one
831 static av_always_inline
832 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
833 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
834 int i, int zero_nhood, int16_t qmul[2])
836 uint8_t *token_prob = probs[i][zero_nhood];
837 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
839 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
842 static av_always_inline
843 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
844 uint8_t t_nnz[9], uint8_t l_nnz[9])
846 int i, x, y, luma_start = 0, luma_ctx = 3;
847 int nnz_pred, nnz, nnz_total = 0;
848 int segment = mb->segment;
851 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
852 nnz_pred = t_nnz[8] + l_nnz[8];
854 // decode DC values and do hadamard
855 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
856 s->qmat[segment].luma_dc_qmul);
857 l_nnz[8] = t_nnz[8] = !!nnz;
862 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
864 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
871 for (y = 0; y < 4; y++)
872 for (x = 0; x < 4; x++) {
873 nnz_pred = l_nnz[y] + t_nnz[x];
874 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
875 nnz_pred, s->qmat[segment].luma_qmul);
876 // nnz+block_dc may be one more than the actual last index, but we don't care
877 td->non_zero_count_cache[y][x] = nnz + block_dc;
878 t_nnz[x] = l_nnz[y] = !!nnz;
883 // TODO: what to do about dimensions? 2nd dim for luma is x,
884 // but for chroma it's (y<<1)|x
885 for (i = 4; i < 6; i++)
886 for (y = 0; y < 2; y++)
887 for (x = 0; x < 2; x++) {
888 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
889 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
890 nnz_pred, s->qmat[segment].chroma_qmul);
891 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
892 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
896 // if there were no coded coeffs despite the macroblock not being marked skip,
897 // we MUST not do the inner loop filter and should not do IDCT
898 // Since skip isn't used for bitstream prediction, just manually set it.
903 static av_always_inline
904 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
905 int linesize, int uvlinesize, int simple)
907 AV_COPY128(top_border, src_y + 15*linesize);
909 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
910 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
914 static av_always_inline
915 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
916 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
917 int simple, int xchg)
919 uint8_t *top_border_m1 = top_border-32; // for TL prediction
921 src_cb -= uvlinesize;
922 src_cr -= uvlinesize;
924 #define XCHG(a,b,xchg) do { \
925 if (xchg) AV_SWAP64(b,a); \
926 else AV_COPY64(b,a); \
929 XCHG(top_border_m1+8, src_y-8, xchg);
930 XCHG(top_border, src_y, xchg);
931 XCHG(top_border+8, src_y+8, 1);
932 if (mb_x < mb_width-1)
933 XCHG(top_border+32, src_y+16, 1);
935 // only copy chroma for normal loop filter
936 // or to initialize the top row to 127
937 if (!simple || !mb_y) {
938 XCHG(top_border_m1+16, src_cb-8, xchg);
939 XCHG(top_border_m1+24, src_cr-8, xchg);
940 XCHG(top_border+16, src_cb, 1);
941 XCHG(top_border+24, src_cr, 1);
945 static av_always_inline
946 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
949 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
951 return mb_y ? mode : LEFT_DC_PRED8x8;
955 static av_always_inline
956 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
959 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
961 return mb_y ? mode : HOR_PRED8x8;
965 static av_always_inline
966 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
968 if (mode == DC_PRED8x8) {
969 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
975 static av_always_inline
976 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
980 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
982 return !mb_y ? DC_127_PRED8x8 : mode;
984 return !mb_x ? DC_129_PRED8x8 : mode;
985 case PLANE_PRED8x8 /*TM*/:
986 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
991 static av_always_inline
992 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
995 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
997 return mb_y ? mode : HOR_VP8_PRED;
1001 static av_always_inline
1002 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1006 if (!mb_x && mb_y) {
1011 case DIAG_DOWN_LEFT_PRED:
1012 case VERT_LEFT_PRED:
1013 return !mb_y ? DC_127_PRED : mode;
1021 return !mb_x ? DC_129_PRED : mode;
1023 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1024 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1025 case DIAG_DOWN_RIGHT_PRED:
1026 case VERT_RIGHT_PRED:
1035 static av_always_inline
1036 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1037 VP8Macroblock *mb, int mb_x, int mb_y)
1039 AVCodecContext *avctx = s->avctx;
1040 int x, y, mode, nnz;
1043 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1044 // otherwise, skip it if we aren't going to deblock
1045 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1046 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1047 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1048 s->filter.simple, 1);
1050 if (mb->mode < MODE_I4x4) {
1051 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1052 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1054 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1056 s->hpc.pred16x16[mode](dst[0], s->linesize);
1058 uint8_t *ptr = dst[0];
1059 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1060 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1062 // all blocks on the right edge of the macroblock use bottom edge
1063 // the top macroblock for their topright edge
1064 uint8_t *tr_right = ptr - s->linesize + 16;
1066 // if we're on the right edge of the frame, said edge is extended
1067 // from the top macroblock
1068 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1069 mb_x == s->mb_width-1) {
1070 tr = tr_right[-1]*0x01010101u;
1071 tr_right = (uint8_t *)&tr;
1075 AV_ZERO128(td->non_zero_count_cache);
1077 for (y = 0; y < 4; y++) {
1078 uint8_t *topright = ptr + 4 - s->linesize;
1079 for (x = 0; x < 4; x++) {
1080 int copy = 0, linesize = s->linesize;
1081 uint8_t *dst = ptr+4*x;
1082 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1084 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1087 topright = tr_right;
1089 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1090 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©);
1092 dst = copy_dst + 12;
1096 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1098 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1102 copy_dst[3] = ptr[4*x-s->linesize-1];
1109 copy_dst[35] = 129U;
1111 copy_dst[11] = ptr[4*x -1];
1112 copy_dst[19] = ptr[4*x+s->linesize -1];
1113 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1114 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1120 s->hpc.pred4x4[mode](dst, topright, linesize);
1122 AV_COPY32(ptr+4*x , copy_dst+12);
1123 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1124 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1125 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1128 nnz = td->non_zero_count_cache[y][x];
1131 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1133 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1138 ptr += 4*s->linesize;
1143 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1144 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1146 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1148 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1149 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1151 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1152 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1153 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1154 s->filter.simple, 0);
1157 static const uint8_t subpel_idx[3][8] = {
1158 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1159 // also function pointer index
1160 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1161 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1167 * @param s VP8 decoding context
1168 * @param dst target buffer for block data at block position
1169 * @param ref reference picture buffer at origin (0, 0)
1170 * @param mv motion vector (relative to block position) to get pixel data from
1171 * @param x_off horizontal position of block from origin (0, 0)
1172 * @param y_off vertical position of block from origin (0, 0)
1173 * @param block_w width of block (16, 8 or 4)
1174 * @param block_h height of block (always same as block_w)
1175 * @param width width of src/dst plane data
1176 * @param height height of src/dst plane data
1177 * @param linesize size of a single line of plane data, including padding
1178 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1180 static av_always_inline
1181 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1182 AVFrame *ref, const VP56mv *mv,
1183 int x_off, int y_off, int block_w, int block_h,
1184 int width, int height, int linesize,
1185 vp8_mc_func mc_func[3][3])
1187 uint8_t *src = ref->data[0];
1191 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1192 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1194 x_off += mv->x >> 2;
1195 y_off += mv->y >> 2;
1198 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1199 src += y_off * linesize + x_off;
1200 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1201 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1202 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1203 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1204 x_off - mx_idx, y_off - my_idx, width, height);
1205 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1207 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1209 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1210 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1215 * chroma MC function
1217 * @param s VP8 decoding context
1218 * @param dst1 target buffer for block data at block position (U plane)
1219 * @param dst2 target buffer for block data at block position (V plane)
1220 * @param ref reference picture buffer at origin (0, 0)
1221 * @param mv motion vector (relative to block position) to get pixel data from
1222 * @param x_off horizontal position of block from origin (0, 0)
1223 * @param y_off vertical position of block from origin (0, 0)
1224 * @param block_w width of block (16, 8 or 4)
1225 * @param block_h height of block (always same as block_w)
1226 * @param width width of src/dst plane data
1227 * @param height height of src/dst plane data
1228 * @param linesize size of a single line of plane data, including padding
1229 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1231 static av_always_inline
1232 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1233 AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1234 int block_w, int block_h, int width, int height, int linesize,
1235 vp8_mc_func mc_func[3][3])
1237 uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1240 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1241 int my = mv->y&7, my_idx = subpel_idx[0][my];
1243 x_off += mv->x >> 3;
1244 y_off += mv->y >> 3;
1247 src1 += y_off * linesize + x_off;
1248 src2 += y_off * linesize + x_off;
1249 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1250 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1251 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1252 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1253 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1254 x_off - mx_idx, y_off - my_idx, width, height);
1255 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1256 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1258 s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1259 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1260 x_off - mx_idx, y_off - my_idx, width, height);
1261 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1262 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1264 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1265 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1268 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1269 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1270 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1274 static av_always_inline
1275 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1276 AVFrame *ref_frame, int x_off, int y_off,
1277 int bx_off, int by_off,
1278 int block_w, int block_h,
1279 int width, int height, VP56mv *mv)
1284 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1285 ref_frame, mv, x_off + bx_off, y_off + by_off,
1286 block_w, block_h, width, height, s->linesize,
1287 s->put_pixels_tab[block_w == 8]);
1290 if (s->profile == 3) {
1294 x_off >>= 1; y_off >>= 1;
1295 bx_off >>= 1; by_off >>= 1;
1296 width >>= 1; height >>= 1;
1297 block_w >>= 1; block_h >>= 1;
1298 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1299 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1300 &uvmv, x_off + bx_off, y_off + by_off,
1301 block_w, block_h, width, height, s->uvlinesize,
1302 s->put_pixels_tab[1 + (block_w == 4)]);
1305 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1306 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1307 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1309 /* Don't prefetch refs that haven't been used very often this frame. */
1310 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1311 int x_off = mb_x << 4, y_off = mb_y << 4;
1312 int mx = (mb->mv.x>>2) + x_off + 8;
1313 int my = (mb->mv.y>>2) + y_off;
1314 uint8_t **src= s->framep[ref]->data;
1315 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1316 /* For threading, a ff_thread_await_progress here might be useful, but
1317 * it actually slows down the decoder. Since a bad prefetch doesn't
1318 * generate bad decoder output, we don't run it here. */
1319 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1320 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1321 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1326 * Apply motion vectors to prediction buffer, chapter 18.
1328 static av_always_inline
1329 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1330 VP8Macroblock *mb, int mb_x, int mb_y)
1332 int x_off = mb_x << 4, y_off = mb_y << 4;
1333 int width = 16*s->mb_width, height = 16*s->mb_height;
1334 AVFrame *ref = s->framep[mb->ref_frame];
1335 VP56mv *bmv = mb->bmv;
1337 switch (mb->partitioning) {
1338 case VP8_SPLITMVMODE_NONE:
1339 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1340 0, 0, 16, 16, width, height, &mb->mv);
1342 case VP8_SPLITMVMODE_4x4: {
1347 for (y = 0; y < 4; y++) {
1348 for (x = 0; x < 4; x++) {
1349 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1351 4*x + x_off, 4*y + y_off, 4, 4,
1352 width, height, s->linesize,
1353 s->put_pixels_tab[2]);
1358 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1359 for (y = 0; y < 2; y++) {
1360 for (x = 0; x < 2; x++) {
1361 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1362 mb->bmv[ 2*y * 4 + 2*x+1].x +
1363 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1364 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1365 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1366 mb->bmv[ 2*y * 4 + 2*x+1].y +
1367 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1368 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1369 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1370 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1371 if (s->profile == 3) {
1375 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1376 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1377 4*x + x_off, 4*y + y_off, 4, 4,
1378 width, height, s->uvlinesize,
1379 s->put_pixels_tab[2]);
1384 case VP8_SPLITMVMODE_16x8:
1385 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386 0, 0, 16, 8, width, height, &bmv[0]);
1387 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1388 0, 8, 16, 8, width, height, &bmv[1]);
1390 case VP8_SPLITMVMODE_8x16:
1391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392 0, 0, 8, 16, width, height, &bmv[0]);
1393 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1394 8, 0, 8, 16, width, height, &bmv[1]);
1396 case VP8_SPLITMVMODE_8x8:
1397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398 0, 0, 8, 8, width, height, &bmv[0]);
1399 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1400 8, 0, 8, 8, width, height, &bmv[1]);
1401 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402 0, 8, 8, 8, width, height, &bmv[2]);
1403 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1404 8, 8, 8, 8, width, height, &bmv[3]);
1409 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1410 uint8_t *dst[3], VP8Macroblock *mb)
1414 if (mb->mode != MODE_I4x4) {
1415 uint8_t *y_dst = dst[0];
1416 for (y = 0; y < 4; y++) {
1417 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1419 if (nnz4&~0x01010101) {
1420 for (x = 0; x < 4; x++) {
1421 if ((uint8_t)nnz4 == 1)
1422 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1423 else if((uint8_t)nnz4 > 1)
1424 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1430 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1433 y_dst += 4*s->linesize;
1437 for (ch = 0; ch < 2; ch++) {
1438 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1440 uint8_t *ch_dst = dst[1+ch];
1441 if (nnz4&~0x01010101) {
1442 for (y = 0; y < 2; y++) {
1443 for (x = 0; x < 2; x++) {
1444 if ((uint8_t)nnz4 == 1)
1445 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1446 else if((uint8_t)nnz4 > 1)
1447 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1450 goto chroma_idct_end;
1452 ch_dst += 4*s->uvlinesize;
1455 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1462 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1464 int interior_limit, filter_level;
1466 if (s->segmentation.enabled) {
1467 filter_level = s->segmentation.filter_level[mb->segment];
1468 if (!s->segmentation.absolute_vals)
1469 filter_level += s->filter.level;
1471 filter_level = s->filter.level;
1473 if (s->lf_delta.enabled) {
1474 filter_level += s->lf_delta.ref[mb->ref_frame];
1475 filter_level += s->lf_delta.mode[mb->mode];
1478 filter_level = av_clip_uintp2(filter_level, 6);
1480 interior_limit = filter_level;
1481 if (s->filter.sharpness) {
1482 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1483 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1485 interior_limit = FFMAX(interior_limit, 1);
1487 f->filter_level = filter_level;
1488 f->inner_limit = interior_limit;
1489 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1492 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1494 int mbedge_lim, bedge_lim, hev_thresh;
1495 int filter_level = f->filter_level;
1496 int inner_limit = f->inner_limit;
1497 int inner_filter = f->inner_filter;
1498 int linesize = s->linesize;
1499 int uvlinesize = s->uvlinesize;
1500 static const uint8_t hev_thresh_lut[2][64] = {
1501 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1502 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1503 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1505 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1506 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1507 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1514 bedge_lim = 2*filter_level + inner_limit;
1515 mbedge_lim = bedge_lim + 4;
1517 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1520 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1521 mbedge_lim, inner_limit, hev_thresh);
1522 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1523 mbedge_lim, inner_limit, hev_thresh);
1527 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1528 inner_limit, hev_thresh);
1529 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1530 inner_limit, hev_thresh);
1531 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1532 inner_limit, hev_thresh);
1533 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1534 uvlinesize, bedge_lim,
1535 inner_limit, hev_thresh);
1539 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1540 mbedge_lim, inner_limit, hev_thresh);
1541 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1542 mbedge_lim, inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1547 linesize, bedge_lim,
1548 inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1550 linesize, bedge_lim,
1551 inner_limit, hev_thresh);
1552 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1553 linesize, bedge_lim,
1554 inner_limit, hev_thresh);
1555 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1556 dst[2] + 4 * uvlinesize,
1557 uvlinesize, bedge_lim,
1558 inner_limit, hev_thresh);
1562 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1564 int mbedge_lim, bedge_lim;
1565 int filter_level = f->filter_level;
1566 int inner_limit = f->inner_limit;
1567 int inner_filter = f->inner_filter;
1568 int linesize = s->linesize;
1573 bedge_lim = 2*filter_level + inner_limit;
1574 mbedge_lim = bedge_lim + 4;
1577 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1579 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1580 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1581 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1585 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1587 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1588 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1589 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1593 static void release_queued_segmaps(VP8Context *s, int is_close)
1595 int leave_behind = is_close ? 0 : !s->maps_are_invalid;
1596 while (s->num_maps_to_be_freed > leave_behind)
1597 av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
1598 s->maps_are_invalid = 0;
1601 #define MARGIN (16 << 2)
1602 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
1603 AVFrame *prev_frame)
1605 VP8Context *s = avctx->priv_data;
1608 s->mv_min.y = -MARGIN;
1609 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1610 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1611 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1612 int mb_xy = mb_y*s->mb_width;
1614 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1616 s->mv_min.x = -MARGIN;
1617 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1618 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1620 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1621 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1622 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
1632 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1634 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1635 if (otd->thread_mb_pos < tmp) {\
1636 pthread_mutex_lock(&otd->lock);\
1637 td->wait_mb_pos = tmp;\
1639 if (otd->thread_mb_pos >= tmp)\
1641 pthread_cond_wait(&otd->cond, &otd->lock);\
1643 td->wait_mb_pos = INT_MAX;\
1644 pthread_mutex_unlock(&otd->lock);\
1648 #define update_pos(td, mb_y, mb_x)\
1650 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1651 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1652 int is_null = (next_td == NULL) || (prev_td == NULL);\
1653 int pos_check = (is_null) ? 1 :\
1654 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1655 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1656 td->thread_mb_pos = pos;\
1657 if (sliced_threading && pos_check) {\
1658 pthread_mutex_lock(&td->lock);\
1659 pthread_cond_broadcast(&td->cond);\
1660 pthread_mutex_unlock(&td->lock);\
1664 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1665 #define update_pos(td, mb_y, mb_x)
1668 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1669 int jobnr, int threadnr)
1671 VP8Context *s = avctx->priv_data;
1672 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1673 int mb_y = td->thread_mb_pos>>16;
1674 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1675 int num_jobs = s->num_jobs;
1676 AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
1677 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1680 curframe->data[0] + 16*mb_y*s->linesize,
1681 curframe->data[1] + 8*mb_y*s->uvlinesize,
1682 curframe->data[2] + 8*mb_y*s->uvlinesize
1684 if (mb_y == 0) prev_td = td;
1685 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1686 if (mb_y == s->mb_height-1) next_td = td;
1687 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1688 if (s->mb_layout == 1)
1689 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1691 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1692 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1693 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1696 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1697 // left edge of 129 for intra prediction
1698 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1699 for (i = 0; i < 3; i++)
1700 for (y = 0; y < 16>>!!i; y++)
1701 dst[i][y*curframe->linesize[i]-1] = 129;
1703 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1707 s->mv_min.x = -MARGIN;
1708 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1710 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1711 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1712 if (prev_td != td) {
1713 if (threadnr != 0) {
1714 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1716 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1720 s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1721 s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1724 decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
1725 prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1727 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1730 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1732 if (mb->mode <= MODE_I4x4)
1733 intra_predict(s, td, dst, mb, mb_x, mb_y);
1735 inter_predict(s, td, dst, mb, mb_x, mb_y);
1737 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1740 idct_mb(s, td, dst, mb);
1742 AV_ZERO64(td->left_nnz);
1743 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1745 // Reset DC block predictors if they would exist if the mb had coefficients
1746 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1747 td->left_nnz[8] = 0;
1748 s->top_nnz[mb_x][8] = 0;
1752 if (s->deblock_filter)
1753 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1755 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1756 if (s->filter.simple)
1757 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1759 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1762 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1770 if (mb_x == s->mb_width+1) {
1771 update_pos(td, mb_y, s->mb_width+3);
1773 update_pos(td, mb_y, mb_x);
1778 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1779 int jobnr, int threadnr)
1781 VP8Context *s = avctx->priv_data;
1782 VP8ThreadData *td = &s->thread_data[threadnr];
1783 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1784 AVFrame *curframe = s->curframe;
1786 VP8ThreadData *prev_td, *next_td;
1788 curframe->data[0] + 16*mb_y*s->linesize,
1789 curframe->data[1] + 8*mb_y*s->uvlinesize,
1790 curframe->data[2] + 8*mb_y*s->uvlinesize
1793 if (s->mb_layout == 1)
1794 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1796 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1798 if (mb_y == 0) prev_td = td;
1799 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1800 if (mb_y == s->mb_height-1) next_td = td;
1801 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1803 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1804 VP8FilterStrength *f = &td->filter_strength[mb_x];
1805 if (prev_td != td) {
1806 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1809 if (next_td != &s->thread_data[0]) {
1810 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1813 if (num_jobs == 1) {
1814 if (s->filter.simple)
1815 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1817 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1820 if (s->filter.simple)
1821 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1823 filter_mb(s, dst, f, mb_x, mb_y);
1828 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1832 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1833 int jobnr, int threadnr)
1835 VP8Context *s = avctx->priv_data;
1836 VP8ThreadData *td = &s->thread_data[jobnr];
1837 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1838 AVFrame *curframe = s->curframe;
1839 int mb_y, num_jobs = s->num_jobs;
1840 td->thread_nr = threadnr;
1841 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1842 if (mb_y >= s->mb_height) break;
1843 td->thread_mb_pos = mb_y<<16;
1844 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1845 if (s->deblock_filter)
1846 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1847 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1852 if (avctx->active_thread_type == FF_THREAD_FRAME)
1853 ff_thread_report_progress(curframe, mb_y, 0);
1859 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1862 VP8Context *s = avctx->priv_data;
1863 int ret, i, referenced, num_jobs;
1864 enum AVDiscard skip_thresh;
1865 AVFrame *av_uninit(curframe), *prev_frame;
1867 release_queued_segmaps(s, 0);
1869 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1872 prev_frame = s->framep[VP56_FRAME_CURRENT];
1874 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1875 || s->update_altref == VP56_FRAME_CURRENT;
1877 skip_thresh = !referenced ? AVDISCARD_NONREF :
1878 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1880 if (avctx->skip_frame >= skip_thresh) {
1882 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1885 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1887 // release no longer referenced frames
1888 for (i = 0; i < 5; i++)
1889 if (s->frames[i].data[0] &&
1890 &s->frames[i] != prev_frame &&
1891 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1892 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1893 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1894 vp8_release_frame(s, &s->frames[i], 1, 0);
1896 // find a free buffer
1897 for (i = 0; i < 5; i++)
1898 if (&s->frames[i] != prev_frame &&
1899 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1900 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1901 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1902 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1906 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1909 if (curframe->data[0])
1910 vp8_release_frame(s, curframe, 1, 0);
1912 // Given that arithmetic probabilities are updated every frame, it's quite likely
1913 // that the values we have on a random interframe are complete junk if we didn't
1914 // start decode on a keyframe. So just don't display anything rather than junk.
1915 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1916 !s->framep[VP56_FRAME_GOLDEN] ||
1917 !s->framep[VP56_FRAME_GOLDEN2])) {
1918 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1919 ret = AVERROR_INVALIDDATA;
1923 curframe->key_frame = s->keyframe;
1924 curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1925 curframe->reference = referenced ? 3 : 0;
1926 if ((ret = vp8_alloc_frame(s, curframe))) {
1927 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1931 // check if golden and altref are swapped
1932 if (s->update_altref != VP56_FRAME_NONE) {
1933 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1935 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1937 if (s->update_golden != VP56_FRAME_NONE) {
1938 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1940 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1942 if (s->update_last) {
1943 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1945 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1947 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1949 ff_thread_finish_setup(avctx);
1951 s->linesize = curframe->linesize[0];
1952 s->uvlinesize = curframe->linesize[1];
1954 if (!s->thread_data[0].edge_emu_buffer)
1955 for (i = 0; i < MAX_THREADS; i++)
1956 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1958 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1959 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1961 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1962 if (!s->mb_layout && s->keyframe)
1963 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1965 // top edge of 127 for intra prediction
1966 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1967 s->top_border[0][15] = s->top_border[0][23] = 127;
1968 s->top_border[0][31] = 127;
1969 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1971 memset(s->ref_count, 0, sizeof(s->ref_count));
1974 // Make sure the previous frame has read its segmentation map,
1975 // if we re-use the same map.
1976 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1977 ff_thread_await_progress(prev_frame, 1, 0);
1979 if (s->mb_layout == 1)
1980 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1982 if (avctx->active_thread_type == FF_THREAD_FRAME)
1985 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1986 s->num_jobs = num_jobs;
1987 s->curframe = curframe;
1988 s->prev_frame = prev_frame;
1989 s->mv_min.y = -MARGIN;
1990 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1991 for (i = 0; i < MAX_THREADS; i++) {
1992 s->thread_data[i].thread_mb_pos = 0;
1993 s->thread_data[i].wait_mb_pos = INT_MAX;
1995 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1997 ff_thread_report_progress(curframe, INT_MAX, 0);
1998 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2001 // if future frames don't use the updated probabilities,
2002 // reset them to the values we saved
2003 if (!s->update_probabilities)
2004 s->prob[0] = s->prob[1];
2006 if (!s->invisible) {
2007 *(AVFrame*)data = *curframe;
2013 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2017 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2019 VP8Context *s = avctx->priv_data;
2022 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2024 ff_dsputil_init(&s->dsp, avctx);
2025 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2026 ff_vp8dsp_init(&s->vp8dsp);
2031 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2033 vp8_decode_flush_impl(avctx, 0, 1, 1);
2034 release_queued_segmaps(avctx->priv_data, 1);
2038 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2040 VP8Context *s = avctx->priv_data;
2047 #define REBASE(pic) \
2048 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2050 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2052 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2054 if (s->macroblocks_base &&
2055 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2057 s->maps_are_invalid = 1;
2058 s->mb_width = s_src->mb_width;
2059 s->mb_height = s_src->mb_height;
2062 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2063 s->segmentation = s_src->segmentation;
2064 s->lf_delta = s_src->lf_delta;
2065 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2067 memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
2068 s->framep[0] = REBASE(s_src->next_framep[0]);
2069 s->framep[1] = REBASE(s_src->next_framep[1]);
2070 s->framep[2] = REBASE(s_src->next_framep[2]);
2071 s->framep[3] = REBASE(s_src->next_framep[3]);
2076 AVCodec ff_vp8_decoder = {
2078 .type = AVMEDIA_TYPE_VIDEO,
2079 .id = AV_CODEC_ID_VP8,
2080 .priv_data_size = sizeof(VP8Context),
2081 .init = vp8_decode_init,
2082 .close = vp8_decode_free,
2083 .decode = vp8_decode_frame,
2084 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2085 .flush = vp8_decode_flush,
2086 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2087 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2088 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),