2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
39 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
40 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
41 #elif CONFIG_VP7_DECODER
42 #define VPX(vp7, f) vp7_ ## f
43 #else // CONFIG_VP8_DECODER
44 #define VPX(vp7, f) vp8_ ## f
47 static void free_buffers(VP8Context *s)
51 for (i = 0; i < MAX_THREADS; i++) {
53 pthread_cond_destroy(&s->thread_data[i].cond);
54 pthread_mutex_destroy(&s->thread_data[i].lock);
56 av_freep(&s->thread_data[i].filter_strength);
58 av_freep(&s->thread_data);
59 av_freep(&s->macroblocks_base);
60 av_freep(&s->intra4x4_pred_mode_top);
61 av_freep(&s->top_nnz);
62 av_freep(&s->top_border);
64 s->macroblocks = NULL;
67 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
70 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
71 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
73 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
74 ff_thread_release_buffer(s->avctx, &f->tf);
75 return AVERROR(ENOMEM);
80 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
82 av_buffer_unref(&f->seg_map);
83 ff_thread_release_buffer(s->avctx, &f->tf);
86 #if CONFIG_VP8_DECODER
87 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
91 vp8_release_frame(s, dst);
93 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
96 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
97 vp8_release_frame(s, dst);
98 return AVERROR(ENOMEM);
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame * vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static int update_dimensions(VP8Context *s, int width, int height)
150 AVCodecContext *avctx = s->avctx;
153 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
154 height != s->avctx->height) {
155 vp8_decode_flush_impl(s->avctx, 1);
157 ret = ff_set_dimensions(s->avctx, width, height);
162 s->mb_width = (s->avctx->coded_width +15) / 16;
163 s->mb_height = (s->avctx->coded_height+15) / 16;
165 s->mb_layout = s->vp7 || (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
166 if (!s->mb_layout) { // Frame threading and one thread
167 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
168 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
170 else // Sliced threading
171 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
172 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
173 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
174 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
176 for (i = 0; i < MAX_THREADS; i++) {
177 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
179 pthread_mutex_init(&s->thread_data[i].lock, NULL);
180 pthread_cond_init(&s->thread_data[i].cond, NULL);
184 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
185 (!s->intra4x4_pred_mode_top && !s->mb_layout))
186 return AVERROR(ENOMEM);
188 s->macroblocks = s->macroblocks_base + 1;
194 #if CONFIG_VP8_DECODER
195 static void parse_segment_info(VP8Context *s)
197 VP56RangeCoder *c = &s->c;
200 s->segmentation.update_map = vp8_rac_get(c);
202 if (vp8_rac_get(c)) { // update segment feature data
203 s->segmentation.absolute_vals = vp8_rac_get(c);
205 for (i = 0; i < 4; i++)
206 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
208 for (i = 0; i < 4; i++)
209 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
211 if (s->segmentation.update_map)
212 for (i = 0; i < 3; i++)
213 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
216 static void update_lf_deltas(VP8Context *s)
218 VP56RangeCoder *c = &s->c;
221 for (i = 0; i < 4; i++) {
222 if (vp8_rac_get(c)) {
223 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
226 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
230 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
231 if (vp8_rac_get(c)) {
232 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
235 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
240 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
242 const uint8_t *sizes = buf;
245 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
247 buf += 3*(s->num_coeff_partitions-1);
248 buf_size -= 3*(s->num_coeff_partitions-1);
252 for (i = 0; i < s->num_coeff_partitions-1; i++) {
253 int size = AV_RL24(sizes + 3*i);
254 if (buf_size - size < 0)
257 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
261 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
267 #if CONFIG_VP7_DECODER
268 static void vp7_get_quants(VP8Context *s)
270 VP56RangeCoder *c = &s->c;
272 int yac_qi = vp8_rac_get_uint(c, 7);
273 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
274 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
275 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
276 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
277 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
279 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
280 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
281 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
282 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
283 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
284 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
288 #if CONFIG_VP8_DECODER
289 static void vp8_get_quants(VP8Context *s)
291 VP56RangeCoder *c = &s->c;
294 int yac_qi = vp8_rac_get_uint(c, 7);
295 int ydc_delta = vp8_rac_get_sint(c, 4);
296 int y2dc_delta = vp8_rac_get_sint(c, 4);
297 int y2ac_delta = vp8_rac_get_sint(c, 4);
298 int uvdc_delta = vp8_rac_get_sint(c, 4);
299 int uvac_delta = vp8_rac_get_sint(c, 4);
301 for (i = 0; i < 4; i++) {
302 if (s->segmentation.enabled) {
303 base_qi = s->segmentation.base_quant[i];
304 if (!s->segmentation.absolute_vals)
309 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
310 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
311 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
312 /* 101581>>16 is equivalent to 155/100 */
313 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
314 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
315 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
317 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
318 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
323 * Determine which buffers golden and altref should be updated with after this frame.
324 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
326 * Intra frames update all 3 references
327 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
328 * If the update (golden|altref) flag is set, it's updated with the current frame
329 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
330 * If the flag is not set, the number read means:
332 * 1: VP56_FRAME_PREVIOUS
333 * 2: update golden with altref, or update altref with golden
335 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
337 VP56RangeCoder *c = &s->c;
340 return VP56_FRAME_CURRENT;
342 switch (vp8_rac_get_uint(c, 2)) {
344 return VP56_FRAME_PREVIOUS;
346 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
348 return VP56_FRAME_NONE;
352 static void vp78_reset_probability_tables(VP8Context *s)
355 for (i = 0; i < 4; i++)
356 for (j = 0; j < 16; j++)
357 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
358 sizeof(s->prob->token[i][j]));
361 static void vp78_update_probability_tables(VP8Context *s)
363 VP56RangeCoder *c = &s->c;
366 for (i = 0; i < 4; i++)
367 for (j = 0; j < 8; j++)
368 for (k = 0; k < 3; k++)
369 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
370 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
371 int prob = vp8_rac_get_uint(c, 8);
372 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
373 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
377 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s)
379 VP56RangeCoder *c = &s->c;
383 for (i = 0; i < 4; i++)
384 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
386 for (i = 0; i < 3; i++)
387 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
389 // 17.2 MV probability update
390 for (i = 0; i < 2; i++)
391 for (j = 0; j < (s->vp7 ? 17 : 19); j++)
392 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
393 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
396 #if CONFIG_VP8_DECODER
397 static void update_refs(VP8Context *s)
399 VP56RangeCoder *c = &s->c;
401 int update_golden = vp8_rac_get(c);
402 int update_altref = vp8_rac_get(c);
404 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
405 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
409 #if CONFIG_VP7_DECODER
410 static void fade(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int width, int height, int alpha, int beta)
413 for (j = 0; j < height; j++)
414 for (i = 0; i < width; i++) {
415 uint8_t y = src[j*src_linesize + i];
416 dst[j*dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
420 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
422 VP56RangeCoder *c = &s->c;
423 int part1_size, hscale, vscale, i, j, ret;
424 int width = s->avctx->width;
425 int height = s->avctx->height;
427 s->profile = (buf[0]>>1) & 7;
428 if (s->profile > 1) {
429 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
430 return AVERROR_INVALIDDATA;
433 s->keyframe = !(buf[0] & 1);
435 part1_size = AV_RL24(buf) >> 4;
437 buf += 4 - s->profile;
438 buf_size -= 4 - s->profile;
440 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
442 ff_vp56_init_range_decoder(c, buf, part1_size);
444 buf_size -= part1_size;
446 /* A. Dimension information (keyframes only) */
448 width = vp8_rac_get_uint(c, 12);
449 height = vp8_rac_get_uint(c, 12);
450 hscale = vp8_rac_get_uint(c, 2);
451 vscale = vp8_rac_get_uint(c, 2);
452 if (hscale || vscale)
453 avpriv_request_sample(s->avctx, "Upscaling");
455 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
456 vp78_reset_probability_tables(s);
457 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
458 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
459 for (i = 0; i < 2; i++)
460 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], sizeof(vp7_mv_default_prob[i]));
461 memset(&s->segmentation, 0, sizeof(s->segmentation));
462 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
463 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
466 if (s->keyframe || s->profile > 0)
467 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
469 /* B. Decoding information for all four macroblock-level features */
470 for (i = 0; i < 4; i++) {
471 s->feature_enabled[i] = vp8_rac_get(c);
472 if (s->feature_enabled[i]) {
473 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
475 for (j = 0; j < 3; j++)
476 s->feature_index_prob[i][j] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
478 if (vp7_feature_value_size[i])
479 for (j = 0; j < 4; j++)
480 s->feature_value[i][j] = vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
484 s->segmentation.enabled = 0;
485 s->segmentation.update_map = 0;
486 s->lf_delta.enabled = 0;
488 s->num_coeff_partitions = 1;
489 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
491 if (!s->macroblocks_base || /* first frame */
492 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
493 if ((ret = update_dimensions(s, width, height)) < 0)
497 /* C. Dequantization indices */
500 /* D. Golden frame update flag (a Flag) for interframes only */
502 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
503 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
507 s->update_probabilities = 1;
510 if (s->profile > 0) {
511 s->update_probabilities = vp8_rac_get(c);
512 if (!s->update_probabilities)
513 s->prob[1] = s->prob[0];
516 s->fade_present = vp8_rac_get(c);
519 /* E. Fading information for previous frame */
520 if (s->fade_present && vp8_rac_get(c)) {
521 int alpha = (int8_t)vp8_rac_get_uint(c, 8);
522 int beta = (int8_t)vp8_rac_get_uint(c, 8);
523 if (!s->keyframe && (alpha || beta)) {
524 /* preserve the golden frame */
525 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
526 AVFrame *gold = s->framep[VP56_FRAME_GOLDEN]->tf.f;
530 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
531 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
533 prev = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
535 fade(prev->data[0], prev->linesize[0], gold->data[0], gold->linesize[0], s->mb_width * 16, s->mb_height * 16, alpha, beta);
536 for (j = 1; j < 3; j++)
537 for (i = 0; i < s->mb_height * 8; i++)
538 memcpy(prev->data[j] + i * prev->linesize[j], gold->data[j] + i * gold->linesize[j], s->mb_width * 8);
540 AVFrame *prev = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
541 fade(prev->data[0], prev->linesize[0], prev->data[0], prev->linesize[0], s->mb_width * 16, s->mb_height * 16, alpha, beta);
547 /* F. Loop filter type */
549 s->filter.simple = vp8_rac_get(c);
551 /* G. DCT coefficient ordering specification */
553 for (i = 1; i < 16; i++)
554 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
556 /* H. Loop filter levels */
558 s->filter.simple = vp8_rac_get(c);
559 s->filter.level = vp8_rac_get_uint(c, 6);
560 s->filter.sharpness = vp8_rac_get_uint(c, 3);
562 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
563 vp78_update_probability_tables(s);
565 s->mbskip_enabled = 0;
567 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
569 s->prob->intra = vp8_rac_get_uint(c, 8);
570 s->prob->last = vp8_rac_get_uint(c, 8);
571 vp78_update_pred16x16_pred8x8_mvc_probabilities(s);
578 #if CONFIG_VP8_DECODER
579 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
581 VP56RangeCoder *c = &s->c;
582 int header_size, hscale, vscale, ret;
583 int width = s->avctx->width;
584 int height = s->avctx->height;
586 s->keyframe = !(buf[0] & 1);
587 s->profile = (buf[0]>>1) & 7;
588 s->invisible = !(buf[0] & 0x10);
589 header_size = AV_RL24(buf) >> 5;
594 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
597 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
598 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
599 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
601 if (header_size > buf_size - 7*s->keyframe) {
602 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
603 return AVERROR_INVALIDDATA;
607 if (AV_RL24(buf) != 0x2a019d) {
608 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
609 return AVERROR_INVALIDDATA;
611 width = AV_RL16(buf+3) & 0x3fff;
612 height = AV_RL16(buf+5) & 0x3fff;
613 hscale = buf[4] >> 6;
614 vscale = buf[6] >> 6;
618 if (hscale || vscale)
619 avpriv_request_sample(s->avctx, "Upscaling");
621 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
622 vp78_reset_probability_tables(s);
623 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
624 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
625 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
626 memset(&s->segmentation, 0, sizeof(s->segmentation));
627 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
630 ff_vp56_init_range_decoder(c, buf, header_size);
632 buf_size -= header_size;
636 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
637 vp8_rac_get(c); // whether we can skip clamping in dsp functions
640 if ((s->segmentation.enabled = vp8_rac_get(c)))
641 parse_segment_info(s);
643 s->segmentation.update_map = 0; // FIXME: move this to some init function?
645 s->filter.simple = vp8_rac_get(c);
646 s->filter.level = vp8_rac_get_uint(c, 6);
647 s->filter.sharpness = vp8_rac_get_uint(c, 3);
649 if ((s->lf_delta.enabled = vp8_rac_get(c)))
653 if (setup_partitions(s, buf, buf_size)) {
654 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
655 return AVERROR_INVALIDDATA;
658 if (!s->macroblocks_base || /* first frame */
659 width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
660 if ((ret = update_dimensions(s, width, height)) < 0)
668 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
669 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
672 // if we aren't saving this frame's probabilities for future frames,
673 // make a copy of the current probabilities
674 if (!(s->update_probabilities = vp8_rac_get(c)))
675 s->prob[1] = s->prob[0];
677 s->update_last = s->keyframe || vp8_rac_get(c);
679 vp78_update_probability_tables(s);
681 if ((s->mbskip_enabled = vp8_rac_get(c)))
682 s->prob->mbskip = vp8_rac_get_uint(c, 8);
685 s->prob->intra = vp8_rac_get_uint(c, 8);
686 s->prob->last = vp8_rac_get_uint(c, 8);
687 s->prob->golden = vp8_rac_get_uint(c, 8);
688 vp78_update_pred16x16_pred8x8_mvc_probabilities(s);
695 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
697 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
698 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
702 * Motion vector coding, 17.1.
704 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
708 if (vp56_rac_get_prob_branchy(c, p[0])) {
711 for (i = 0; i < 3; i++)
712 x += vp56_rac_get_prob(c, p[9 + i]) << i;
713 for (i = (vp7 ? 7 : 9); i > 3; i--)
714 x += vp56_rac_get_prob(c, p[9 + i]) << i;
715 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
719 const uint8_t *ps = p+2;
720 bit = vp56_rac_get_prob(c, *ps);
723 bit = vp56_rac_get_prob(c, *ps);
726 x += vp56_rac_get_prob(c, *ps);
729 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
732 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
734 return read_mv_component(c, p, 1);
737 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
739 return read_mv_component(c, p, 0);
742 static av_always_inline
743 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
746 return vp8_submv_prob[4-!!left];
748 return vp8_submv_prob[2];
749 return vp8_submv_prob[1-!!left];
753 * Split motion vector prediction, 16.4.
754 * @returns the number of motion vectors parsed (2, 4 or 16)
756 static av_always_inline
757 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout, int vp7)
761 VP8Macroblock *top_mb;
762 VP8Macroblock *left_mb = &mb[-1];
763 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
765 *mbsplits_cur, *firstidx;
767 VP56mv *left_mv = left_mb->bmv;
768 VP56mv *cur_mv = mb->bmv;
770 if (!layout) // layout is inlined, s->mb_layout is not
773 top_mb = &mb[-s->mb_width-1];
774 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
775 top_mv = top_mb->bmv;
777 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
778 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
779 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
781 part_idx = VP8_SPLITMVMODE_8x8;
784 part_idx = VP8_SPLITMVMODE_4x4;
787 num = vp8_mbsplit_count[part_idx];
788 mbsplits_cur = vp8_mbsplits[part_idx],
789 firstidx = vp8_mbfirstidx[part_idx];
790 mb->partitioning = part_idx;
792 for (n = 0; n < num; n++) {
794 uint32_t left, above;
795 const uint8_t *submv_prob;
798 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
800 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
802 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
804 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
806 submv_prob = vp7 ? vp7_submv_prob : get_submv_prob(left, above);
808 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
809 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
810 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
811 mb->bmv[n].y = mb->mv.y + VPX(vp7, read_mv_component)(c, s->prob->mvc[0]);
812 mb->bmv[n].x = mb->mv.x + VPX(vp7, read_mv_component)(c, s->prob->mvc[1]);
814 AV_ZERO32(&mb->bmv[n]);
817 AV_WN32A(&mb->bmv[n], above);
820 AV_WN32A(&mb->bmv[n], left);
828 * the vp7 reference decoder uses a padding macroblock column (added to right
829 * edge of the frame) to guard against illegal macroblock offsets. The algorithm
830 * has bugs that permit offsets to straddle the padding column. This function
831 * replicates those bugs.
832 * @param[out] edge_x macroblock x address
833 * @param[out] edge_y macroblock y address
834 * @return macroblock offset legal (boolean)
836 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, int xoffset, int yoffset, int boundary, int *edge_x, int *edge_y)
838 int vwidth = mb_width + 1;
839 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
840 if (new < boundary || new % vwidth == vwidth - 1)
842 *edge_y = new / vwidth;
843 *edge_x = new % vwidth;
847 static const VP56mv * get_bmv_ptr(const VP8Macroblock *mb, int subblock)
849 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
852 static av_always_inline
853 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
855 VP8Macroblock *mb_edge[12];
856 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
857 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
860 uint8_t cnt[3] = { 0 };
861 VP56RangeCoder *c = &s->c;
864 AV_ZERO32(&near_mv[0]);
865 AV_ZERO32(&near_mv[1]);
866 AV_ZERO32(&near_mv[2]);
868 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
869 const VP7MVPred * pred = &vp7_mv_pred[i];
872 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, pred->yoffset, !s->profile, &edge_x, &edge_y)) {
873 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1) ?
874 s->macroblocks_base + (s->mb_width+1)*(edge_y + 1) + 1 + edge_x :
875 s->macroblocks + (s->mb_height - edge_y - 1)*2 + edge_x;
876 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
878 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
879 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
881 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
882 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
886 AV_WN32A(&near_mv[CNT_NEAR], mv);
890 AV_WN32A(&near_mv[CNT_NEAREST], mv);
899 cnt[idx] += vp7_mv_pred[i].score;
902 mb->partitioning = VP8_SPLITMVMODE_NONE;
904 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
905 mb->mode = VP8_MVMODE_MV;
907 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
909 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
911 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
912 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
914 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
916 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
917 mb->mode = VP8_MVMODE_SPLIT;
918 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, 1) - 1];
920 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
921 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
925 mb->mv = near_mv[CNT_NEAR];
929 mb->mv = near_mv[CNT_NEAREST];
933 mb->mode = VP8_MVMODE_ZERO;
939 static av_always_inline
940 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
942 VP8Macroblock *mb_edge[3] = { 0 /* top */,
945 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
946 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
948 int cur_sign_bias = s->sign_bias[mb->ref_frame];
949 int8_t *sign_bias = s->sign_bias;
951 uint8_t cnt[4] = { 0 };
952 VP56RangeCoder *c = &s->c;
954 if (!layout) { // layout is inlined (s->mb_layout is not)
959 mb_edge[0] = mb - s->mb_width-1;
960 mb_edge[2] = mb - s->mb_width-2;
963 AV_ZERO32(&near_mv[0]);
964 AV_ZERO32(&near_mv[1]);
965 AV_ZERO32(&near_mv[2]);
967 /* Process MB on top, left and top-left */
968 #define MV_EDGE_CHECK(n)\
970 VP8Macroblock *edge = mb_edge[n];\
971 int edge_ref = edge->ref_frame;\
972 if (edge_ref != VP56_FRAME_CURRENT) {\
973 uint32_t mv = AV_RN32A(&edge->mv);\
975 if (cur_sign_bias != sign_bias[edge_ref]) {\
976 /* SWAR negate of the values in mv. */\
978 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
980 if (!n || mv != AV_RN32A(&near_mv[idx]))\
981 AV_WN32A(&near_mv[++idx], mv);\
982 cnt[idx] += 1 + (n != 2);\
984 cnt[CNT_ZERO] += 1 + (n != 2);\
992 mb->partitioning = VP8_SPLITMVMODE_NONE;
993 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
994 mb->mode = VP8_MVMODE_MV;
996 /* If we have three distinct MVs, merge first and last if they're the same */
997 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
998 cnt[CNT_NEAREST] += 1;
1000 /* Swap near and nearest if necessary */
1001 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1002 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1003 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1006 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1007 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1009 /* Choose the best mv out of 0,0 and the nearest mv */
1010 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1011 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1012 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1013 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1015 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1016 mb->mode = VP8_MVMODE_SPLIT;
1017 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, 0) - 1];
1019 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1020 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1021 mb->bmv[0] = mb->mv;
1024 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1025 mb->bmv[0] = mb->mv;
1028 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1029 mb->bmv[0] = mb->mv;
1032 mb->mode = VP8_MVMODE_ZERO;
1034 mb->bmv[0] = mb->mv;
1038 static av_always_inline
1039 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1040 int mb_x, int keyframe, int layout)
1042 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1045 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1046 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1051 uint8_t* const left = s->intra4x4_pred_mode_left;
1053 top = mb->intra4x4_pred_mode_top;
1055 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1056 for (y = 0; y < 4; y++) {
1057 for (x = 0; x < 4; x++) {
1059 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1060 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1061 left[y] = top[x] = *intra4x4;
1067 for (i = 0; i < 16; i++)
1068 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
1072 static const char * vp7_feature_name[] = { "q-index", "lf-delta", "partial-golden-update", "blit-pitch" };
1074 static av_always_inline
1075 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1076 uint8_t *segment, uint8_t *ref, int layout, int vp7)
1078 VP56RangeCoder *c = &s->c;
1083 for (i = 0; i < 4; i++) {
1084 if (s->feature_enabled[i]) {
1085 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1086 int index = vp8_rac_get_tree(c, vp7_feature_index_tree, s->feature_index_prob[i]);
1087 av_log(s->avctx, AV_LOG_WARNING, "Feature %s present in macroblock (value 0x%x)\n", vp7_feature_name[i], s->feature_value[i][index]);
1092 if (s->segmentation.update_map) {
1093 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1094 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1095 } else if (s->segmentation.enabled)
1096 *segment = ref ? *ref : *segment;
1098 mb->segment = *segment;
1100 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1103 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
1105 if (mb->mode == MODE_I4x4) {
1106 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1108 const uint32_t modes = VPX(vp7, pred4x4_mode)[mb->mode] * 0x01010101u;
1110 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1112 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1113 AV_WN32A( s->intra4x4_pred_mode_left, modes);
1116 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
1117 mb->ref_frame = VP56_FRAME_CURRENT;
1118 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1120 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1121 mb->ref_frame = (!vp7 && vp56_rac_get_prob(c, s->prob->golden)) ?
1122 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
1124 mb->ref_frame = VP56_FRAME_PREVIOUS;
1125 s->ref_count[mb->ref_frame-1]++;
1127 // motion vectors, 16.3
1129 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1131 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1134 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1136 if (mb->mode == MODE_I4x4)
1137 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1139 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
1140 mb->ref_frame = VP56_FRAME_CURRENT;
1141 mb->partitioning = VP8_SPLITMVMODE_NONE;
1142 AV_ZERO32(&mb->bmv[0]);
1146 static av_always_inline int inter_predict_dc(int16_t block[16], int16_t pred[2])
1148 int16_t dc = block[0];
1156 if (!pred[0] || !dc || FFSIGN(pred[0]) != FFSIGN(dc)) {
1157 block[0] = pred[0] = dc;
1162 block[0] = pred[0] = dc;
1169 * @param r arithmetic bitstream reader context
1170 * @param block destination for block coefficients
1171 * @param probs probabilities to use when reading trees from the bitstream
1172 * @param i initial coeff index, 0 unless a separate DC block is coded
1173 * @param qmul array holding the dc/ac dequant factor at position 0/1
1174 * @return 0 if no coeffs were decoded
1175 * otherwise, the index of the last coeff decoded plus one
1177 static av_always_inline
1178 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1179 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1180 int i, uint8_t *token_prob, int16_t qmul[2],
1181 const uint8_t scan[16], int vp7)
1183 VP56RangeCoder c = *r;
1188 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1192 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1194 break; // invalid input; blocks should end with EOB
1195 token_prob = probs[i][0];
1201 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1203 token_prob = probs[i+1][1];
1205 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1206 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1208 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1212 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1213 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1214 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1215 } else { // DCT_CAT2
1217 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1218 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1220 } else { // DCT_CAT3 and up
1221 int a = vp56_rac_get_prob(&c, token_prob[8]);
1222 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
1223 int cat = (a<<1) + b;
1224 coeff = 3 + (8<<cat);
1225 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1228 token_prob = probs[i+1][2];
1230 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1237 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1238 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1239 int i, uint8_t *token_prob, int16_t qmul[2],
1240 const uint8_t scan[16])
1242 return decode_block_coeffs_internal(r, block, probs, i, token_prob, qmul, scan, 1);
1245 #ifndef vp8_decode_block_coeffs_internal
1246 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1247 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1248 int i, uint8_t *token_prob, int16_t qmul[2])
1250 return decode_block_coeffs_internal(r, block, probs, i, token_prob, qmul, zigzag_scan, 0);
1255 * @param c arithmetic bitstream reader context
1256 * @param block destination for block coefficients
1257 * @param probs probabilities to use when reading trees from the bitstream
1258 * @param i initial coeff index, 0 unless a separate DC block is coded
1259 * @param zero_nhood the initial prediction context for number of surrounding
1260 * all-zero blocks (only left/top, so 0-2)
1261 * @param qmul array holding the dc/ac dequant factor at position 0/1
1262 * @param scan scan pattern (VP7 only)
1263 * @return 0 if no coeffs were decoded
1264 * otherwise, the index of the last coeff decoded plus one
1266 static av_always_inline
1267 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1268 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1269 int i, int zero_nhood, int16_t qmul[2],
1270 const uint8_t scan[16], int vp7)
1272 uint8_t *token_prob = probs[i][zero_nhood];
1273 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1275 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul, scan)
1276 : vp8_decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
1279 static av_always_inline
1280 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
1281 uint8_t t_nnz[9], uint8_t l_nnz[9], int vp7)
1283 int i, x, y, luma_start = 0, luma_ctx = 3;
1284 int nnz_pred, nnz, nnz_total = 0;
1285 int segment = mb->segment;
1288 if (mb->mode != MODE_I4x4 && (vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1289 nnz_pred = t_nnz[8] + l_nnz[8];
1291 // decode DC values and do hadamard
1292 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
1293 s->qmat[segment].luma_dc_qmul, zigzag_scan, vp7);
1294 l_nnz[8] = t_nnz[8] = !!nnz;
1296 if (vp7 && mb->mode > MODE_I4x4)
1297 nnz |= inter_predict_dc(td->block_dc, s->inter_dc_pred[mb->ref_frame - 1]);
1303 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1305 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1312 for (y = 0; y < 4; y++)
1313 for (x = 0; x < 4; x++) {
1314 nnz_pred = l_nnz[y] + t_nnz[x];
1315 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
1316 nnz_pred, s->qmat[segment].luma_qmul, s->prob[0].scan, vp7);
1317 // nnz+block_dc may be one more than the actual last index, but we don't care
1318 td->non_zero_count_cache[y][x] = nnz + block_dc;
1319 t_nnz[x] = l_nnz[y] = !!nnz;
1324 // TODO: what to do about dimensions? 2nd dim for luma is x,
1325 // but for chroma it's (y<<1)|x
1326 for (i = 4; i < 6; i++)
1327 for (y = 0; y < 2; y++)
1328 for (x = 0; x < 2; x++) {
1329 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
1330 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
1331 nnz_pred, s->qmat[segment].chroma_qmul, s->prob[0].scan, vp7);
1332 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
1333 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
1337 // if there were no coded coeffs despite the macroblock not being marked skip,
1338 // we MUST not do the inner loop filter and should not do IDCT
1339 // Since skip isn't used for bitstream prediction, just manually set it.
1344 static av_always_inline
1345 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
1346 int linesize, int uvlinesize, int simple)
1348 AV_COPY128(top_border, src_y + 15*linesize);
1350 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
1351 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
1355 static av_always_inline
1356 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
1357 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
1358 int simple, int xchg)
1360 uint8_t *top_border_m1 = top_border-32; // for TL prediction
1362 src_cb -= uvlinesize;
1363 src_cr -= uvlinesize;
1365 #define XCHG(a,b,xchg) do { \
1366 if (xchg) AV_SWAP64(b,a); \
1367 else AV_COPY64(b,a); \
1370 XCHG(top_border_m1+8, src_y-8, xchg);
1371 XCHG(top_border, src_y, xchg);
1372 XCHG(top_border+8, src_y+8, 1);
1373 if (mb_x < mb_width-1)
1374 XCHG(top_border+32, src_y+16, 1);
1376 // only copy chroma for normal loop filter
1377 // or to initialize the top row to 127
1378 if (!simple || !mb_y) {
1379 XCHG(top_border_m1+16, src_cb-8, xchg);
1380 XCHG(top_border_m1+24, src_cr-8, xchg);
1381 XCHG(top_border+16, src_cb, 1);
1382 XCHG(top_border+24, src_cr, 1);
1386 static av_always_inline
1387 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1390 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1392 return mb_y ? mode : LEFT_DC_PRED8x8;
1396 static av_always_inline
1397 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1400 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1402 return mb_y ? mode : HOR_PRED8x8;
1406 static av_always_inline
1407 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1411 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1413 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1415 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1416 case PLANE_PRED8x8 /*TM*/:
1417 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1422 static av_always_inline
1423 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1426 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1428 return mb_y ? mode : HOR_VP8_PRED;
1432 static av_always_inline
1433 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf, int vp7)
1437 if (!mb_x && mb_y) {
1442 case DIAG_DOWN_LEFT_PRED:
1443 case VERT_LEFT_PRED:
1444 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1452 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1454 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1455 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1456 case DIAG_DOWN_RIGHT_PRED:
1457 case VERT_RIGHT_PRED:
1466 static av_always_inline
1467 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1468 VP8Macroblock *mb, int mb_x, int mb_y, int vp7)
1470 int x, y, mode, nnz;
1473 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1474 // otherwise, skip it if we aren't going to deblock
1475 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1476 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1477 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1478 s->filter.simple, 1);
1480 if (mb->mode < MODE_I4x4) {
1481 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, vp7);
1482 s->hpc.pred16x16[mode](dst[0], s->linesize);
1484 uint8_t *ptr = dst[0];
1485 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1486 const uint8_t lo = vp7 ? 128 : 127;
1487 const uint8_t hi = vp7 ? 128 : 129;
1488 uint8_t tr_top[4] = { lo, lo, lo, lo };
1490 // all blocks on the right edge of the macroblock use bottom edge
1491 // the top macroblock for their topright edge
1492 uint8_t *tr_right = ptr - s->linesize + 16;
1494 // if we're on the right edge of the frame, said edge is extended
1495 // from the top macroblock
1497 mb_x == s->mb_width-1) {
1498 tr = tr_right[-1]*0x01010101u;
1499 tr_right = (uint8_t *)&tr;
1503 AV_ZERO128(td->non_zero_count_cache);
1505 for (y = 0; y < 4; y++) {
1506 uint8_t *topright = ptr + 4 - s->linesize;
1507 for (x = 0; x < 4; x++) {
1508 int copy = 0, linesize = s->linesize;
1509 uint8_t *dst = ptr+4*x;
1510 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1512 if ((y == 0 || x == 3) && mb_y == 0) {
1515 topright = tr_right;
1517 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©, vp7);
1519 dst = copy_dst + 12;
1523 AV_WN32A(copy_dst+4, lo * 0x01010101U);
1525 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1529 copy_dst[3] = ptr[4*x-s->linesize-1];
1538 copy_dst[11] = ptr[4*x -1];
1539 copy_dst[19] = ptr[4*x+s->linesize -1];
1540 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1541 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1544 s->hpc.pred4x4[mode](dst, topright, linesize);
1546 AV_COPY32(ptr+4*x , copy_dst+12);
1547 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1548 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1549 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1552 nnz = td->non_zero_count_cache[y][x];
1555 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1557 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1562 ptr += 4*s->linesize;
1567 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y, vp7);
1568 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1569 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1571 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1572 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1573 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1574 s->filter.simple, 0);
1577 static const uint8_t subpel_idx[3][8] = {
1578 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1579 // also function pointer index
1580 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1581 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1587 * @param s VP8 decoding context
1588 * @param dst target buffer for block data at block position
1589 * @param ref reference picture buffer at origin (0, 0)
1590 * @param mv motion vector (relative to block position) to get pixel data from
1591 * @param x_off horizontal position of block from origin (0, 0)
1592 * @param y_off vertical position of block from origin (0, 0)
1593 * @param block_w width of block (16, 8 or 4)
1594 * @param block_h height of block (always same as block_w)
1595 * @param width width of src/dst plane data
1596 * @param height height of src/dst plane data
1597 * @param linesize size of a single line of plane data, including padding
1598 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1600 static av_always_inline
1601 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1602 ThreadFrame *ref, const VP56mv *mv,
1603 int x_off, int y_off, int block_w, int block_h,
1604 int width, int height, ptrdiff_t linesize,
1605 vp8_mc_func mc_func[3][3])
1607 uint8_t *src = ref->f->data[0];
1610 int src_linesize = linesize;
1612 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1613 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1615 x_off += mv->x >> 2;
1616 y_off += mv->y >> 2;
1619 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1620 src += y_off * linesize + x_off;
1621 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1622 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1623 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1624 src - my_idx * linesize - mx_idx,
1625 EDGE_EMU_LINESIZE, linesize,
1626 block_w + subpel_idx[1][mx],
1627 block_h + subpel_idx[1][my],
1628 x_off - mx_idx, y_off - my_idx, width, height);
1629 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1630 src_linesize = EDGE_EMU_LINESIZE;
1632 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1634 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1635 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1640 * chroma MC function
1642 * @param s VP8 decoding context
1643 * @param dst1 target buffer for block data at block position (U plane)
1644 * @param dst2 target buffer for block data at block position (V plane)
1645 * @param ref reference picture buffer at origin (0, 0)
1646 * @param mv motion vector (relative to block position) to get pixel data from
1647 * @param x_off horizontal position of block from origin (0, 0)
1648 * @param y_off vertical position of block from origin (0, 0)
1649 * @param block_w width of block (16, 8 or 4)
1650 * @param block_h height of block (always same as block_w)
1651 * @param width width of src/dst plane data
1652 * @param height height of src/dst plane data
1653 * @param linesize size of a single line of plane data, including padding
1654 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1656 static av_always_inline
1657 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1658 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1659 int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1660 vp8_mc_func mc_func[3][3])
1662 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1665 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1666 int my = mv->y&7, my_idx = subpel_idx[0][my];
1668 x_off += mv->x >> 3;
1669 y_off += mv->y >> 3;
1672 src1 += y_off * linesize + x_off;
1673 src2 += y_off * linesize + x_off;
1674 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1675 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1676 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1677 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1678 src1 - my_idx * linesize - mx_idx,
1679 EDGE_EMU_LINESIZE, linesize,
1680 block_w + subpel_idx[1][mx],
1681 block_h + subpel_idx[1][my],
1682 x_off - mx_idx, y_off - my_idx, width, height);
1683 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1684 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1686 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1687 src2 - my_idx * linesize - mx_idx,
1688 EDGE_EMU_LINESIZE, linesize,
1689 block_w + subpel_idx[1][mx],
1690 block_h + subpel_idx[1][my],
1691 x_off - mx_idx, y_off - my_idx, width, height);
1692 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1693 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1695 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1696 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1699 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1700 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1701 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1705 static av_always_inline
1706 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1707 ThreadFrame *ref_frame, int x_off, int y_off,
1708 int bx_off, int by_off,
1709 int block_w, int block_h,
1710 int width, int height, VP56mv *mv)
1715 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1716 ref_frame, mv, x_off + bx_off, y_off + by_off,
1717 block_w, block_h, width, height, s->linesize,
1718 s->put_pixels_tab[block_w == 8]);
1721 if (s->profile == 3) { /* this block only applies VP8; it is safe to check only the profile, as VP7 profile <= 1 */
1725 x_off >>= 1; y_off >>= 1;
1726 bx_off >>= 1; by_off >>= 1;
1727 width >>= 1; height >>= 1;
1728 block_w >>= 1; block_h >>= 1;
1729 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1730 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1731 &uvmv, x_off + bx_off, y_off + by_off,
1732 block_w, block_h, width, height, s->uvlinesize,
1733 s->put_pixels_tab[1 + (block_w == 4)]);
1736 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1737 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1738 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1740 /* Don't prefetch refs that haven't been used very often this frame. */
1741 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1742 int x_off = mb_x << 4, y_off = mb_y << 4;
1743 int mx = (mb->mv.x>>2) + x_off + 8;
1744 int my = (mb->mv.y>>2) + y_off;
1745 uint8_t **src= s->framep[ref]->tf.f->data;
1746 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1747 /* For threading, a ff_thread_await_progress here might be useful, but
1748 * it actually slows down the decoder. Since a bad prefetch doesn't
1749 * generate bad decoder output, we don't run it here. */
1750 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1751 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1752 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1757 * Apply motion vectors to prediction buffer, chapter 18.
1759 static av_always_inline
1760 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1761 VP8Macroblock *mb, int mb_x, int mb_y)
1763 int x_off = mb_x << 4, y_off = mb_y << 4;
1764 int width = 16*s->mb_width, height = 16*s->mb_height;
1765 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1766 VP56mv *bmv = mb->bmv;
1768 switch (mb->partitioning) {
1769 case VP8_SPLITMVMODE_NONE:
1770 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1771 0, 0, 16, 16, width, height, &mb->mv);
1773 case VP8_SPLITMVMODE_4x4: {
1778 for (y = 0; y < 4; y++) {
1779 for (x = 0; x < 4; x++) {
1780 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1782 4*x + x_off, 4*y + y_off, 4, 4,
1783 width, height, s->linesize,
1784 s->put_pixels_tab[2]);
1789 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1790 for (y = 0; y < 2; y++) {
1791 for (x = 0; x < 2; x++) {
1792 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1793 mb->bmv[ 2*y * 4 + 2*x+1].x +
1794 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1795 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1796 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1797 mb->bmv[ 2*y * 4 + 2*x+1].y +
1798 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1799 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1800 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1801 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1802 if (s->profile == 3) {
1806 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1807 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1808 4*x + x_off, 4*y + y_off, 4, 4,
1809 width, height, s->uvlinesize,
1810 s->put_pixels_tab[2]);
1815 case VP8_SPLITMVMODE_16x8:
1816 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1817 0, 0, 16, 8, width, height, &bmv[0]);
1818 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1819 0, 8, 16, 8, width, height, &bmv[1]);
1821 case VP8_SPLITMVMODE_8x16:
1822 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1823 0, 0, 8, 16, width, height, &bmv[0]);
1824 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1825 8, 0, 8, 16, width, height, &bmv[1]);
1827 case VP8_SPLITMVMODE_8x8:
1828 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1829 0, 0, 8, 8, width, height, &bmv[0]);
1830 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1831 8, 0, 8, 8, width, height, &bmv[1]);
1832 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1833 0, 8, 8, 8, width, height, &bmv[2]);
1834 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1835 8, 8, 8, 8, width, height, &bmv[3]);
1840 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1841 uint8_t *dst[3], VP8Macroblock *mb)
1845 if (mb->mode != MODE_I4x4) {
1846 uint8_t *y_dst = dst[0];
1847 for (y = 0; y < 4; y++) {
1848 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1850 if (nnz4&~0x01010101) {
1851 for (x = 0; x < 4; x++) {
1852 if ((uint8_t)nnz4 == 1)
1853 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1854 else if((uint8_t)nnz4 > 1)
1855 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1861 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1864 y_dst += 4*s->linesize;
1868 for (ch = 0; ch < 2; ch++) {
1869 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1871 uint8_t *ch_dst = dst[1+ch];
1872 if (nnz4&~0x01010101) {
1873 for (y = 0; y < 2; y++) {
1874 for (x = 0; x < 2; x++) {
1875 if ((uint8_t)nnz4 == 1)
1876 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1877 else if((uint8_t)nnz4 > 1)
1878 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1881 goto chroma_idct_end;
1883 ch_dst += 4*s->uvlinesize;
1886 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1893 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f, int vp7)
1895 int interior_limit, filter_level;
1897 if (s->segmentation.enabled) {
1898 filter_level = s->segmentation.filter_level[mb->segment];
1899 if (!s->segmentation.absolute_vals)
1900 filter_level += s->filter.level;
1902 filter_level = s->filter.level;
1904 if (s->lf_delta.enabled) {
1905 filter_level += s->lf_delta.ref[mb->ref_frame];
1906 filter_level += s->lf_delta.mode[mb->mode];
1909 filter_level = av_clip_uintp2(filter_level, 6);
1911 interior_limit = filter_level;
1912 if (s->filter.sharpness) {
1913 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1914 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1916 interior_limit = FFMAX(interior_limit, 1);
1918 f->filter_level = filter_level;
1919 f->inner_limit = interior_limit;
1920 f->inner_filter = vp7 || !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1923 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y, int vp7)
1925 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
1926 int filter_level = f->filter_level;
1927 int inner_limit = f->inner_limit;
1928 int inner_filter = f->inner_filter;
1929 int linesize = s->linesize;
1930 int uvlinesize = s->uvlinesize;
1931 static const uint8_t hev_thresh_lut[2][64] = {
1932 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1933 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1934 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1936 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1937 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1938 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1946 bedge_lim_y = filter_level;
1947 bedge_lim_uv = 2*filter_level;
1948 mbedge_lim = filter_level + 2;
1951 bedge_lim_uv = 2*filter_level + inner_limit;
1952 mbedge_lim = bedge_lim_y + 4;
1955 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1958 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1959 mbedge_lim, inner_limit, hev_thresh);
1960 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1961 mbedge_lim, inner_limit, hev_thresh);
1964 #define H_LOOP_FILTER_16Y_INNER(cond) \
1965 if (cond && inner_filter) {\
1966 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim_y,\
1967 inner_limit, hev_thresh);\
1968 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim_y,\
1969 inner_limit, hev_thresh);\
1970 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim_y,\
1971 inner_limit, hev_thresh);\
1972 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,\
1973 uvlinesize, bedge_lim_uv,\
1974 inner_limit, hev_thresh);\
1977 H_LOOP_FILTER_16Y_INNER(!vp7)
1980 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1981 mbedge_lim, inner_limit, hev_thresh);
1982 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1983 mbedge_lim, inner_limit, hev_thresh);
1987 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1988 linesize, bedge_lim_y,
1989 inner_limit, hev_thresh);
1990 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1991 linesize, bedge_lim_y,
1992 inner_limit, hev_thresh);
1993 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1994 linesize, bedge_lim_y,
1995 inner_limit, hev_thresh);
1996 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1997 dst[2] + 4 * uvlinesize,
1998 uvlinesize, bedge_lim_uv,
1999 inner_limit, hev_thresh);
2002 H_LOOP_FILTER_16Y_INNER(vp7)
2005 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
2007 int mbedge_lim, bedge_lim;
2008 int filter_level = f->filter_level;
2009 int inner_limit = f->inner_limit;
2010 int inner_filter = f->inner_filter;
2011 int linesize = s->linesize;
2016 bedge_lim = 2*filter_level + inner_limit;
2017 mbedge_lim = bedge_lim + 4;
2020 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2022 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
2023 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
2024 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
2028 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2030 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
2031 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
2032 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
2036 #define MARGIN (16 << 2)
2037 static av_always_inline
2038 void decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2039 VP8Frame *prev_frame, int vp7)
2041 VP8Context *s = avctx->priv_data;
2044 s->mv_min.y = -MARGIN;
2045 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2046 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2047 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2048 int mb_xy = mb_y*s->mb_width;
2050 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
2052 s->mv_min.x = -MARGIN;
2053 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2054 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2056 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
2057 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2058 prev_frame && prev_frame->seg_map ?
2059 prev_frame->seg_map->data + mb_xy : NULL, 1, vp7);
2068 #if CONFIG_VP7_DECODER
2069 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2070 VP8Frame *prev_frame)
2072 decode_mv_mb_modes(avctx, curframe, prev_frame, 1);
2076 #if CONFIG_VP8_DECODER
2077 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2078 VP8Frame *prev_frame)
2080 decode_mv_mb_modes(avctx, curframe, prev_frame, 0);
2085 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
2087 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
2088 if (otd->thread_mb_pos < tmp) {\
2089 pthread_mutex_lock(&otd->lock);\
2090 td->wait_mb_pos = tmp;\
2092 if (otd->thread_mb_pos >= tmp)\
2094 pthread_cond_wait(&otd->cond, &otd->lock);\
2096 td->wait_mb_pos = INT_MAX;\
2097 pthread_mutex_unlock(&otd->lock);\
2101 #define update_pos(td, mb_y, mb_x)\
2103 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
2104 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
2105 int is_null = (next_td == NULL) || (prev_td == NULL);\
2106 int pos_check = (is_null) ? 1 :\
2107 (next_td != td && pos >= next_td->wait_mb_pos) ||\
2108 (prev_td != td && pos >= prev_td->wait_mb_pos);\
2109 td->thread_mb_pos = pos;\
2110 if (sliced_threading && pos_check) {\
2111 pthread_mutex_lock(&td->lock);\
2112 pthread_cond_broadcast(&td->cond);\
2113 pthread_mutex_unlock(&td->lock);\
2117 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2118 #define update_pos(td, mb_y, mb_x)
2121 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2122 int jobnr, int threadnr, int vp7)
2124 VP8Context *s = avctx->priv_data;
2125 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2126 int mb_y = td->thread_mb_pos>>16;
2127 int mb_x, mb_xy = mb_y*s->mb_width;
2128 int num_jobs = s->num_jobs;
2129 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2130 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
2133 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
2134 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
2135 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
2137 if (mb_y == 0) prev_td = td;
2138 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
2139 if (mb_y == s->mb_height-1) next_td = td;
2140 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
2141 if (s->mb_layout == 1)
2142 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2144 // Make sure the previous frame has read its segmentation map,
2145 // if we re-use the same map.
2146 if (prev_frame && s->segmentation.enabled &&
2147 !s->segmentation.update_map)
2148 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2149 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
2150 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2151 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
2154 if (!vp7 || mb_y == 0)
2155 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2157 s->mv_min.x = -MARGIN;
2158 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2160 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2161 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2162 if (prev_td != td) {
2163 if (threadnr != 0) {
2164 check_thread_pos(td, prev_td, mb_x+(vp7?2:1), mb_y-(vp7?2:1));
2166 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+(vp7?2:1)), mb_y-(vp7?2:1));
2170 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2171 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
2174 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2175 prev_frame && prev_frame->seg_map ?
2176 prev_frame->seg_map->data + mb_xy : NULL, 0, vp7);
2178 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2181 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, vp7);
2183 if (mb->mode <= MODE_I4x4)
2184 intra_predict(s, td, dst, mb, mb_x, mb_y, vp7);
2186 inter_predict(s, td, dst, mb, mb_x, mb_y);
2188 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2191 idct_mb(s, td, dst, mb);
2193 AV_ZERO64(td->left_nnz);
2194 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2196 // Reset DC block predictors if they would exist if the mb had coefficients
2197 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2198 td->left_nnz[8] = 0;
2199 s->top_nnz[mb_x][8] = 0;
2203 if (s->deblock_filter)
2204 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], vp7);
2206 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
2207 if (s->filter.simple)
2208 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
2210 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2213 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2221 if (mb_x == s->mb_width+1) {
2222 update_pos(td, mb_y, s->mb_width+3);
2224 update_pos(td, mb_y, mb_x);
2229 #if CONFIG_VP7_DECODER
2230 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2231 int jobnr, int threadnr)
2233 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2237 #if CONFIG_VP8_DECODER
2238 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2239 int jobnr, int threadnr)
2241 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2245 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2246 int jobnr, int threadnr, int vp7)
2248 VP8Context *s = avctx->priv_data;
2249 VP8ThreadData *td = &s->thread_data[threadnr];
2250 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
2251 AVFrame *curframe = s->curframe->tf.f;
2253 VP8ThreadData *prev_td, *next_td;
2255 curframe->data[0] + 16*mb_y*s->linesize,
2256 curframe->data[1] + 8*mb_y*s->uvlinesize,
2257 curframe->data[2] + 8*mb_y*s->uvlinesize
2260 if (s->mb_layout == 1)
2261 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2263 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
2265 if (mb_y == 0) prev_td = td;
2266 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
2267 if (mb_y == s->mb_height-1) next_td = td;
2268 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
2270 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2271 VP8FilterStrength *f = &td->filter_strength[mb_x];
2272 if (prev_td != td) {
2273 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
2276 if (next_td != &s->thread_data[0]) {
2277 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
2280 if (num_jobs == 1) {
2281 if (s->filter.simple)
2282 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
2284 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2287 if (s->filter.simple)
2288 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2290 filter_mb(s, dst, f, mb_x, mb_y, vp7);
2295 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
2299 #if CONFIG_VP7_DECODER
2300 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2301 int jobnr, int threadnr)
2303 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2307 #if CONFIG_VP8_DECODER
2308 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2309 int jobnr, int threadnr)
2311 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2315 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2316 int jobnr, int threadnr)
2318 VP8Context *s = avctx->priv_data;
2319 VP8ThreadData *td = &s->thread_data[jobnr];
2320 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2321 VP8Frame *curframe = s->curframe;
2322 int mb_y, num_jobs = s->num_jobs;
2323 td->thread_nr = threadnr;
2324 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2325 if (mb_y >= s->mb_height) break;
2326 td->thread_mb_pos = mb_y<<16;
2327 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2328 if (s->deblock_filter)
2329 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2330 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2335 if (avctx->active_thread_type == FF_THREAD_FRAME)
2336 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2342 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2345 VP8Context *s = avctx->priv_data;
2346 int ret, i, referenced, num_jobs;
2347 enum AVDiscard skip_thresh;
2348 VP8Frame *av_uninit(curframe), *prev_frame;
2350 if ((ret = VPX(s->vp7, decode_frame_header)(s, avpkt->data, avpkt->size)) < 0)
2353 prev_frame = s->framep[VP56_FRAME_CURRENT];
2355 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
2356 || s->update_altref == VP56_FRAME_CURRENT;
2358 skip_thresh = !referenced ? AVDISCARD_NONREF :
2359 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
2361 if (avctx->skip_frame >= skip_thresh) {
2363 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2366 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2368 // release no longer referenced frames
2369 for (i = 0; i < 5; i++)
2370 if (s->frames[i].tf.f->data[0] &&
2371 &s->frames[i] != prev_frame &&
2372 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2373 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2374 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2375 vp8_release_frame(s, &s->frames[i]);
2377 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2379 // Given that arithmetic probabilities are updated every frame, it's quite likely
2380 // that the values we have on a random interframe are complete junk if we didn't
2381 // start decode on a keyframe. So just don't display anything rather than junk.
2382 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2383 !s->framep[VP56_FRAME_GOLDEN] ||
2384 !s->framep[VP56_FRAME_GOLDEN2])) {
2385 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
2386 ret = AVERROR_INVALIDDATA;
2390 curframe->tf.f->key_frame = s->keyframe;
2391 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
2392 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2395 // check if golden and altref are swapped
2396 if (s->update_altref != VP56_FRAME_NONE) {
2397 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2399 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2401 if (s->update_golden != VP56_FRAME_NONE) {
2402 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2404 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2406 if (s->update_last) {
2407 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2409 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2411 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2413 ff_thread_finish_setup(avctx);
2415 s->linesize = curframe->tf.f->linesize[0];
2416 s->uvlinesize = curframe->tf.f->linesize[1];
2418 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
2419 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
2421 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
2422 if (!s->mb_layout && s->keyframe)
2423 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
2425 memset(s->ref_count, 0, sizeof(s->ref_count));
2428 if (s->mb_layout == 1) {
2429 // Make sure the previous frame has read its segmentation map,
2430 // if we re-use the same map.
2431 if (prev_frame && s->segmentation.enabled &&
2432 !s->segmentation.update_map)
2433 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2434 VPX(s->vp7, decode_mv_mb_modes)(avctx, curframe, prev_frame);
2437 if (avctx->active_thread_type == FF_THREAD_FRAME)
2440 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2441 s->num_jobs = num_jobs;
2442 s->curframe = curframe;
2443 s->prev_frame = prev_frame;
2444 s->mv_min.y = -MARGIN;
2445 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2446 for (i = 0; i < MAX_THREADS; i++) {
2447 s->thread_data[i].thread_mb_pos = 0;
2448 s->thread_data[i].wait_mb_pos = INT_MAX;
2450 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
2452 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2453 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2456 // if future frames don't use the updated probabilities,
2457 // reset them to the values we saved
2458 if (!s->update_probabilities)
2459 s->prob[0] = s->prob[1];
2461 if (!s->invisible) {
2462 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2469 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2473 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2475 VP8Context *s = avctx->priv_data;
2478 vp8_decode_flush_impl(avctx, 1);
2479 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2480 av_frame_free(&s->frames[i].tf.f);
2485 static av_cold int vp8_init_frames(VP8Context *s)
2488 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2489 s->frames[i].tf.f = av_frame_alloc();
2490 if (!s->frames[i].tf.f)
2491 return AVERROR(ENOMEM);
2496 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2498 VP8Context *s = avctx->priv_data;
2502 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2503 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2504 avctx->internal->allocate_progress = 1;
2506 ff_videodsp_init(&s->vdsp, 8);
2507 ff_h264_pred_init(&s->hpc, s->vp7 ? AV_CODEC_ID_VP7 : AV_CODEC_ID_VP8, 8, 1);
2508 ff_vp8dsp_init(&s->vp8dsp, s->vp7);
2510 s->decode_mb_row_no_filter = VPX(s->vp7, decode_mb_row_no_filter);
2511 s->filter_mb_row = VPX(s->vp7, filter_mb_row);
2513 if ((ret = vp8_init_frames(s)) < 0) {
2514 ff_vp8_decode_free(avctx);
2521 #if CONFIG_VP8_DECODER
2522 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2524 VP8Context *s = avctx->priv_data;
2529 if ((ret = vp8_init_frames(s)) < 0) {
2530 ff_vp8_decode_free(avctx);
2537 #define REBASE(pic) \
2538 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2540 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2542 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2545 if (s->macroblocks_base &&
2546 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2548 s->mb_width = s_src->mb_width;
2549 s->mb_height = s_src->mb_height;
2552 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2553 s->segmentation = s_src->segmentation;
2554 s->lf_delta = s_src->lf_delta;
2555 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2557 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2558 if (s_src->frames[i].tf.f->data[0]) {
2559 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2565 s->framep[0] = REBASE(s_src->next_framep[0]);
2566 s->framep[1] = REBASE(s_src->next_framep[1]);
2567 s->framep[2] = REBASE(s_src->next_framep[2]);
2568 s->framep[3] = REBASE(s_src->next_framep[3]);
2574 #if CONFIG_VP7_DECODER
2575 AVCodec ff_vp7_decoder = {
2577 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2578 .type = AVMEDIA_TYPE_VIDEO,
2579 .id = AV_CODEC_ID_VP7,
2580 .priv_data_size = sizeof(VP8Context),
2581 .init = ff_vp8_decode_init,
2582 .close = ff_vp8_decode_free,
2583 .decode = ff_vp8_decode_frame,
2584 .capabilities = CODEC_CAP_DR1,
2585 .flush = vp8_decode_flush,
2589 #if CONFIG_VP8_DECODER
2590 AVCodec ff_vp8_decoder = {
2592 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2593 .type = AVMEDIA_TYPE_VIDEO,
2594 .id = AV_CODEC_ID_VP8,
2595 .priv_data_size = sizeof(VP8Context),
2596 .init = ff_vp8_decode_init,
2597 .close = ff_vp8_decode_free,
2598 .decode = ff_vp8_decode_frame,
2599 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2600 .flush = vp8_decode_flush,
2601 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2602 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),