2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
42 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
43 #elif CONFIG_VP7_DECODER
44 #define VPX(vp7, f) vp7_ ## f
45 #else // CONFIG_VP8_DECODER
46 #define VPX(vp7, f) vp8_ ## f
49 static void free_buffers(VP8Context *s)
53 for (i = 0; i < MAX_THREADS; i++) {
55 pthread_cond_destroy(&s->thread_data[i].cond);
56 pthread_mutex_destroy(&s->thread_data[i].lock);
58 av_freep(&s->thread_data[i].filter_strength);
60 av_freep(&s->thread_data);
61 av_freep(&s->macroblocks_base);
62 av_freep(&s->intra4x4_pred_mode_top);
63 av_freep(&s->top_nnz);
64 av_freep(&s->top_border);
66 s->macroblocks = NULL;
69 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
72 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
73 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76 ff_thread_release_buffer(s->avctx, &f->tf);
77 return AVERROR(ENOMEM);
82 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
84 av_buffer_unref(&f->seg_map);
85 ff_thread_release_buffer(s->avctx, &f->tf);
88 #if CONFIG_VP8_DECODER
89 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
93 vp8_release_frame(s, dst);
95 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
98 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
99 vp8_release_frame(s, dst);
100 return AVERROR(ENOMEM);
105 #endif /* CONFIG_VP8_DECODER */
107 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
109 VP8Context *s = avctx->priv_data;
112 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
113 vp8_release_frame(s, &s->frames[i]);
114 memset(s->framep, 0, sizeof(s->framep));
120 static void vp8_decode_flush(AVCodecContext *avctx)
122 vp8_decode_flush_impl(avctx, 0);
125 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
127 VP8Frame *frame = NULL;
130 // find a free buffer
131 for (i = 0; i < 5; i++)
132 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
133 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
135 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
136 frame = &s->frames[i];
140 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
143 if (frame->tf.f->data[0])
144 vp8_release_frame(s, frame);
149 static av_always_inline
150 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
152 AVCodecContext *avctx = s->avctx;
155 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
156 height != s->avctx->height) {
157 vp8_decode_flush_impl(s->avctx, 1);
159 ret = ff_set_dimensions(s->avctx, width, height);
164 s->mb_width = (s->avctx->coded_width + 15) / 16;
165 s->mb_height = (s->avctx->coded_height + 15) / 16;
167 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
168 avctx->thread_count > 1;
169 if (!s->mb_layout) { // Frame threading and one thread
170 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
171 sizeof(*s->macroblocks));
172 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
173 } else // Sliced threading
174 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
175 sizeof(*s->macroblocks));
176 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
177 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
180 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
181 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
183 return AVERROR(ENOMEM);
186 for (i = 0; i < MAX_THREADS; i++) {
187 s->thread_data[i].filter_strength =
188 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
189 if (!s->thread_data[i].filter_strength) {
191 return AVERROR(ENOMEM);
194 pthread_mutex_init(&s->thread_data[i].lock, NULL);
195 pthread_cond_init(&s->thread_data[i].cond, NULL);
199 s->macroblocks = s->macroblocks_base + 1;
204 static int vp7_update_dimensions(VP8Context *s, int width, int height)
206 return update_dimensions(s, width, height, IS_VP7);
209 static int vp8_update_dimensions(VP8Context *s, int width, int height)
211 return update_dimensions(s, width, height, IS_VP8);
215 static void parse_segment_info(VP8Context *s)
217 VP56RangeCoder *c = &s->c;
220 s->segmentation.update_map = vp8_rac_get(c);
222 if (vp8_rac_get(c)) { // update segment feature data
223 s->segmentation.absolute_vals = vp8_rac_get(c);
225 for (i = 0; i < 4; i++)
226 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
228 for (i = 0; i < 4; i++)
229 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
231 if (s->segmentation.update_map)
232 for (i = 0; i < 3; i++)
233 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
236 static void update_lf_deltas(VP8Context *s)
238 VP56RangeCoder *c = &s->c;
241 for (i = 0; i < 4; i++) {
242 if (vp8_rac_get(c)) {
243 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
246 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
250 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
251 if (vp8_rac_get(c)) {
252 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
255 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
260 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
262 const uint8_t *sizes = buf;
266 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
268 buf += 3 * (s->num_coeff_partitions - 1);
269 buf_size -= 3 * (s->num_coeff_partitions - 1);
273 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
274 int size = AV_RL24(sizes + 3 * i);
275 if (buf_size - size < 0)
278 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
284 return ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
287 static void vp7_get_quants(VP8Context *s)
289 VP56RangeCoder *c = &s->c;
291 int yac_qi = vp8_rac_get_uint(c, 7);
292 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
295 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
298 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
299 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
300 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
301 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
302 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
303 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
306 static void vp8_get_quants(VP8Context *s)
308 VP56RangeCoder *c = &s->c;
311 int yac_qi = vp8_rac_get_uint(c, 7);
312 int ydc_delta = vp8_rac_get_sint(c, 4);
313 int y2dc_delta = vp8_rac_get_sint(c, 4);
314 int y2ac_delta = vp8_rac_get_sint(c, 4);
315 int uvdc_delta = vp8_rac_get_sint(c, 4);
316 int uvac_delta = vp8_rac_get_sint(c, 4);
318 for (i = 0; i < 4; i++) {
319 if (s->segmentation.enabled) {
320 base_qi = s->segmentation.base_quant[i];
321 if (!s->segmentation.absolute_vals)
326 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
327 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
328 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
329 /* 101581>>16 is equivalent to 155/100 */
330 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
331 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
332 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
334 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
335 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
340 * Determine which buffers golden and altref should be updated with after this frame.
341 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
343 * Intra frames update all 3 references
344 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
345 * If the update (golden|altref) flag is set, it's updated with the current frame
346 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
347 * If the flag is not set, the number read means:
349 * 1: VP56_FRAME_PREVIOUS
350 * 2: update golden with altref, or update altref with golden
352 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
354 VP56RangeCoder *c = &s->c;
357 return VP56_FRAME_CURRENT;
359 switch (vp8_rac_get_uint(c, 2)) {
361 return VP56_FRAME_PREVIOUS;
363 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
365 return VP56_FRAME_NONE;
368 static void vp78_reset_probability_tables(VP8Context *s)
371 for (i = 0; i < 4; i++)
372 for (j = 0; j < 16; j++)
373 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
374 sizeof(s->prob->token[i][j]));
377 static void vp78_update_probability_tables(VP8Context *s)
379 VP56RangeCoder *c = &s->c;
382 for (i = 0; i < 4; i++)
383 for (j = 0; j < 8; j++)
384 for (k = 0; k < 3; k++)
385 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
386 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
387 int prob = vp8_rac_get_uint(c, 8);
388 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
389 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
393 #define VP7_MVC_SIZE 17
394 #define VP8_MVC_SIZE 19
396 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
399 VP56RangeCoder *c = &s->c;
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < mvc_size; j++)
412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
416 static void update_refs(VP8Context *s)
418 VP56RangeCoder *c = &s->c;
420 int update_golden = vp8_rac_get(c);
421 int update_altref = vp8_rac_get(c);
423 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
424 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
427 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
431 for (j = 1; j < 3; j++) {
432 for (i = 0; i < height / 2; i++)
433 memcpy(dst->data[j] + i * dst->linesize[j],
434 src->data[j] + i * src->linesize[j], width / 2);
438 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
439 const uint8_t *src, ptrdiff_t src_linesize,
440 int width, int height,
444 for (j = 0; j < height; j++) {
445 for (i = 0; i < width; i++) {
446 uint8_t y = src[j * src_linesize + i];
447 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
452 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
454 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
455 int beta = (int8_t) vp8_rac_get_uint(c, 8);
458 if (!s->keyframe && (alpha || beta)) {
459 int width = s->mb_width * 16;
460 int height = s->mb_height * 16;
463 if (!s->framep[VP56_FRAME_PREVIOUS] ||
464 !s->framep[VP56_FRAME_GOLDEN]) {
465 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
466 return AVERROR_INVALIDDATA;
470 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
472 /* preserve the golden frame, write a new previous frame */
473 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
474 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
475 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
478 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
480 copy_chroma(dst, src, width, height);
483 fade(dst->data[0], dst->linesize[0],
484 src->data[0], src->linesize[0],
485 width, height, alpha, beta);
491 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
493 VP56RangeCoder *c = &s->c;
494 int part1_size, hscale, vscale, i, j, ret;
495 int width = s->avctx->width;
496 int height = s->avctx->height;
499 return AVERROR_INVALIDDATA;
502 s->profile = (buf[0] >> 1) & 7;
503 if (s->profile > 1) {
504 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
505 return AVERROR_INVALIDDATA;
508 s->keyframe = !(buf[0] & 1);
510 part1_size = AV_RL24(buf) >> 4;
512 if (buf_size < 4 - s->profile + part1_size) {
513 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
514 return AVERROR_INVALIDDATA;
517 buf += 4 - s->profile;
518 buf_size -= 4 - s->profile;
520 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
522 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
526 buf_size -= part1_size;
528 /* A. Dimension information (keyframes only) */
530 width = vp8_rac_get_uint(c, 12);
531 height = vp8_rac_get_uint(c, 12);
532 hscale = vp8_rac_get_uint(c, 2);
533 vscale = vp8_rac_get_uint(c, 2);
534 if (hscale || vscale)
535 avpriv_request_sample(s->avctx, "Upscaling");
537 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
538 vp78_reset_probability_tables(s);
539 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
540 sizeof(s->prob->pred16x16));
541 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
542 sizeof(s->prob->pred8x8c));
543 for (i = 0; i < 2; i++)
544 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
545 sizeof(vp7_mv_default_prob[i]));
546 memset(&s->segmentation, 0, sizeof(s->segmentation));
547 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
548 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
551 if (s->keyframe || s->profile > 0)
552 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
554 /* B. Decoding information for all four macroblock-level features */
555 for (i = 0; i < 4; i++) {
556 s->feature_enabled[i] = vp8_rac_get(c);
557 if (s->feature_enabled[i]) {
558 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
560 for (j = 0; j < 3; j++)
561 s->feature_index_prob[i][j] =
562 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
564 if (vp7_feature_value_size[s->profile][i])
565 for (j = 0; j < 4; j++)
566 s->feature_value[i][j] =
567 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
571 s->segmentation.enabled = 0;
572 s->segmentation.update_map = 0;
573 s->lf_delta.enabled = 0;
575 s->num_coeff_partitions = 1;
576 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
580 if (!s->macroblocks_base || /* first frame */
581 width != s->avctx->width || height != s->avctx->height ||
582 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
583 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
587 /* C. Dequantization indices */
590 /* D. Golden frame update flag (a Flag) for interframes only */
592 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
593 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
597 s->update_probabilities = 1;
600 if (s->profile > 0) {
601 s->update_probabilities = vp8_rac_get(c);
602 if (!s->update_probabilities)
603 s->prob[1] = s->prob[0];
606 s->fade_present = vp8_rac_get(c);
609 /* E. Fading information for previous frame */
610 if (s->fade_present && vp8_rac_get(c)) {
611 if ((ret = vp7_fade_frame(s ,c)) < 0)
615 /* F. Loop filter type */
617 s->filter.simple = vp8_rac_get(c);
619 /* G. DCT coefficient ordering specification */
621 for (i = 1; i < 16; i++)
622 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
624 /* H. Loop filter levels */
626 s->filter.simple = vp8_rac_get(c);
627 s->filter.level = vp8_rac_get_uint(c, 6);
628 s->filter.sharpness = vp8_rac_get_uint(c, 3);
630 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
631 vp78_update_probability_tables(s);
633 s->mbskip_enabled = 0;
635 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
637 s->prob->intra = vp8_rac_get_uint(c, 8);
638 s->prob->last = vp8_rac_get_uint(c, 8);
639 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
645 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
647 VP56RangeCoder *c = &s->c;
648 int header_size, hscale, vscale, ret;
649 int width = s->avctx->width;
650 int height = s->avctx->height;
653 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
654 return AVERROR_INVALIDDATA;
657 s->keyframe = !(buf[0] & 1);
658 s->profile = (buf[0]>>1) & 7;
659 s->invisible = !(buf[0] & 0x10);
660 header_size = AV_RL24(buf) >> 5;
665 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
668 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
669 sizeof(s->put_pixels_tab));
670 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
671 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
672 sizeof(s->put_pixels_tab));
674 if (header_size > buf_size - 7 * s->keyframe) {
675 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
676 return AVERROR_INVALIDDATA;
680 if (AV_RL24(buf) != 0x2a019d) {
681 av_log(s->avctx, AV_LOG_ERROR,
682 "Invalid start code 0x%x\n", AV_RL24(buf));
683 return AVERROR_INVALIDDATA;
685 width = AV_RL16(buf + 3) & 0x3fff;
686 height = AV_RL16(buf + 5) & 0x3fff;
687 hscale = buf[4] >> 6;
688 vscale = buf[6] >> 6;
692 if (hscale || vscale)
693 avpriv_request_sample(s->avctx, "Upscaling");
695 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
696 vp78_reset_probability_tables(s);
697 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
698 sizeof(s->prob->pred16x16));
699 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
700 sizeof(s->prob->pred8x8c));
701 memcpy(s->prob->mvc, vp8_mv_default_prob,
702 sizeof(s->prob->mvc));
703 memset(&s->segmentation, 0, sizeof(s->segmentation));
704 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
707 ret = ff_vp56_init_range_decoder(c, buf, header_size);
711 buf_size -= header_size;
714 s->colorspace = vp8_rac_get(c);
716 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
717 s->fullrange = vp8_rac_get(c);
720 if ((s->segmentation.enabled = vp8_rac_get(c)))
721 parse_segment_info(s);
723 s->segmentation.update_map = 0; // FIXME: move this to some init function?
725 s->filter.simple = vp8_rac_get(c);
726 s->filter.level = vp8_rac_get_uint(c, 6);
727 s->filter.sharpness = vp8_rac_get_uint(c, 3);
729 if ((s->lf_delta.enabled = vp8_rac_get(c)))
733 if (setup_partitions(s, buf, buf_size)) {
734 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
735 return AVERROR_INVALIDDATA;
738 if (!s->macroblocks_base || /* first frame */
739 width != s->avctx->width || height != s->avctx->height ||
740 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
741 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
748 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
749 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
752 // if we aren't saving this frame's probabilities for future frames,
753 // make a copy of the current probabilities
754 if (!(s->update_probabilities = vp8_rac_get(c)))
755 s->prob[1] = s->prob[0];
757 s->update_last = s->keyframe || vp8_rac_get(c);
759 vp78_update_probability_tables(s);
761 if ((s->mbskip_enabled = vp8_rac_get(c)))
762 s->prob->mbskip = vp8_rac_get_uint(c, 8);
765 s->prob->intra = vp8_rac_get_uint(c, 8);
766 s->prob->last = vp8_rac_get_uint(c, 8);
767 s->prob->golden = vp8_rac_get_uint(c, 8);
768 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
774 static av_always_inline
775 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
777 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
778 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
779 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
780 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
784 * Motion vector coding, 17.1.
786 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
790 if (vp56_rac_get_prob_branchy(c, p[0])) {
793 for (i = 0; i < 3; i++)
794 x += vp56_rac_get_prob(c, p[9 + i]) << i;
795 for (i = (vp7 ? 7 : 9); i > 3; i--)
796 x += vp56_rac_get_prob(c, p[9 + i]) << i;
797 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
801 const uint8_t *ps = p + 2;
802 bit = vp56_rac_get_prob(c, *ps);
805 bit = vp56_rac_get_prob(c, *ps);
808 x += vp56_rac_get_prob(c, *ps);
811 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
814 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
816 return read_mv_component(c, p, 1);
819 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
821 return read_mv_component(c, p, 0);
824 static av_always_inline
825 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
828 return vp7_submv_prob;
831 return vp8_submv_prob[4 - !!left];
833 return vp8_submv_prob[2];
834 return vp8_submv_prob[1 - !!left];
838 * Split motion vector prediction, 16.4.
839 * @returns the number of motion vectors parsed (2, 4 or 16)
841 static av_always_inline
842 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
843 int layout, int is_vp7)
847 VP8Macroblock *top_mb;
848 VP8Macroblock *left_mb = &mb[-1];
849 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
850 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
852 VP56mv *left_mv = left_mb->bmv;
853 VP56mv *cur_mv = mb->bmv;
855 if (!layout) // layout is inlined, s->mb_layout is not
858 top_mb = &mb[-s->mb_width - 1];
859 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
860 top_mv = top_mb->bmv;
862 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
863 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
864 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
866 part_idx = VP8_SPLITMVMODE_8x8;
868 part_idx = VP8_SPLITMVMODE_4x4;
871 num = vp8_mbsplit_count[part_idx];
872 mbsplits_cur = vp8_mbsplits[part_idx],
873 firstidx = vp8_mbfirstidx[part_idx];
874 mb->partitioning = part_idx;
876 for (n = 0; n < num; n++) {
878 uint32_t left, above;
879 const uint8_t *submv_prob;
882 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
884 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
886 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
888 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
890 submv_prob = get_submv_prob(left, above, is_vp7);
892 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
893 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
894 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
895 mb->bmv[n].y = mb->mv.y +
896 read_mv_component(c, s->prob->mvc[0], is_vp7);
897 mb->bmv[n].x = mb->mv.x +
898 read_mv_component(c, s->prob->mvc[1], is_vp7);
900 AV_ZERO32(&mb->bmv[n]);
903 AV_WN32A(&mb->bmv[n], above);
906 AV_WN32A(&mb->bmv[n], left);
914 * The vp7 reference decoder uses a padding macroblock column (added to right
915 * edge of the frame) to guard against illegal macroblock offsets. The
916 * algorithm has bugs that permit offsets to straddle the padding column.
917 * This function replicates those bugs.
919 * @param[out] edge_x macroblock x address
920 * @param[out] edge_y macroblock y address
922 * @return macroblock offset legal (boolean)
924 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
925 int xoffset, int yoffset, int boundary,
926 int *edge_x, int *edge_y)
928 int vwidth = mb_width + 1;
929 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
930 if (new < boundary || new % vwidth == vwidth - 1)
932 *edge_y = new / vwidth;
933 *edge_x = new % vwidth;
937 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
939 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
942 static av_always_inline
943 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
944 int mb_x, int mb_y, int layout)
946 VP8Macroblock *mb_edge[12];
947 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
948 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
951 uint8_t cnt[3] = { 0 };
952 VP56RangeCoder *c = &s->c;
955 AV_ZERO32(&near_mv[0]);
956 AV_ZERO32(&near_mv[1]);
957 AV_ZERO32(&near_mv[2]);
959 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
960 const VP7MVPred * pred = &vp7_mv_pred[i];
963 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
964 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
965 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
966 ? s->macroblocks_base + 1 + edge_x +
967 (s->mb_width + 1) * (edge_y + 1)
968 : s->macroblocks + edge_x +
969 (s->mb_height - edge_y - 1) * 2;
970 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
972 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
973 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
975 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
976 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
980 AV_WN32A(&near_mv[CNT_NEAR], mv);
984 AV_WN32A(&near_mv[CNT_NEAREST], mv);
993 cnt[idx] += vp7_mv_pred[i].score;
996 mb->partitioning = VP8_SPLITMVMODE_NONE;
998 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
999 mb->mode = VP8_MVMODE_MV;
1001 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1003 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1005 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1006 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1008 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1010 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1011 mb->mode = VP8_MVMODE_SPLIT;
1012 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1014 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1015 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1016 mb->bmv[0] = mb->mv;
1019 mb->mv = near_mv[CNT_NEAR];
1020 mb->bmv[0] = mb->mv;
1023 mb->mv = near_mv[CNT_NEAREST];
1024 mb->bmv[0] = mb->mv;
1027 mb->mode = VP8_MVMODE_ZERO;
1029 mb->bmv[0] = mb->mv;
1033 static av_always_inline
1034 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1035 int mb_x, int mb_y, int layout)
1037 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1040 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1041 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1043 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1044 int8_t *sign_bias = s->sign_bias;
1046 uint8_t cnt[4] = { 0 };
1047 VP56RangeCoder *c = &s->c;
1049 if (!layout) { // layout is inlined (s->mb_layout is not)
1050 mb_edge[0] = mb + 2;
1051 mb_edge[2] = mb + 1;
1053 mb_edge[0] = mb - s->mb_width - 1;
1054 mb_edge[2] = mb - s->mb_width - 2;
1057 AV_ZERO32(&near_mv[0]);
1058 AV_ZERO32(&near_mv[1]);
1059 AV_ZERO32(&near_mv[2]);
1061 /* Process MB on top, left and top-left */
1062 #define MV_EDGE_CHECK(n) \
1064 VP8Macroblock *edge = mb_edge[n]; \
1065 int edge_ref = edge->ref_frame; \
1066 if (edge_ref != VP56_FRAME_CURRENT) { \
1067 uint32_t mv = AV_RN32A(&edge->mv); \
1069 if (cur_sign_bias != sign_bias[edge_ref]) { \
1070 /* SWAR negate of the values in mv. */ \
1072 mv = ((mv & 0x7fff7fff) + \
1073 0x00010001) ^ (mv & 0x80008000); \
1075 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1076 AV_WN32A(&near_mv[++idx], mv); \
1077 cnt[idx] += 1 + (n != 2); \
1079 cnt[CNT_ZERO] += 1 + (n != 2); \
1087 mb->partitioning = VP8_SPLITMVMODE_NONE;
1088 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1089 mb->mode = VP8_MVMODE_MV;
1091 /* If we have three distinct MVs, merge first and last if they're the same */
1092 if (cnt[CNT_SPLITMV] &&
1093 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1094 cnt[CNT_NEAREST] += 1;
1096 /* Swap near and nearest if necessary */
1097 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1098 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1099 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1102 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1103 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1104 /* Choose the best mv out of 0,0 and the nearest mv */
1105 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1106 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1107 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1108 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1110 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1111 mb->mode = VP8_MVMODE_SPLIT;
1112 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1114 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1115 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1116 mb->bmv[0] = mb->mv;
1119 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1120 mb->bmv[0] = mb->mv;
1123 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1124 mb->bmv[0] = mb->mv;
1127 mb->mode = VP8_MVMODE_ZERO;
1129 mb->bmv[0] = mb->mv;
1133 static av_always_inline
1134 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1135 int mb_x, int keyframe, int layout)
1137 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1140 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1141 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1146 uint8_t *const left = s->intra4x4_pred_mode_left;
1148 top = mb->intra4x4_pred_mode_top;
1150 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1151 for (y = 0; y < 4; y++) {
1152 for (x = 0; x < 4; x++) {
1154 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1155 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1156 left[y] = top[x] = *intra4x4;
1162 for (i = 0; i < 16; i++)
1163 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1164 vp8_pred4x4_prob_inter);
1168 static av_always_inline
1169 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1170 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1172 VP56RangeCoder *c = &s->c;
1173 static const char *vp7_feature_name[] = { "q-index",
1175 "partial-golden-update",
1180 for (i = 0; i < 4; i++) {
1181 if (s->feature_enabled[i]) {
1182 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1183 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1184 s->feature_index_prob[i]);
1185 av_log(s->avctx, AV_LOG_WARNING,
1186 "Feature %s present in macroblock (value 0x%x)\n",
1187 vp7_feature_name[i], s->feature_value[i][index]);
1191 } else if (s->segmentation.update_map) {
1192 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1193 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1194 } else if (s->segmentation.enabled)
1195 *segment = ref ? *ref : *segment;
1196 mb->segment = *segment;
1198 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1201 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1202 vp8_pred16x16_prob_intra);
1204 if (mb->mode == MODE_I4x4) {
1205 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1207 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1208 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1210 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1212 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1213 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1216 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1217 vp8_pred8x8c_prob_intra);
1218 mb->ref_frame = VP56_FRAME_CURRENT;
1219 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1221 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1223 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1224 : VP56_FRAME_GOLDEN;
1226 mb->ref_frame = VP56_FRAME_PREVIOUS;
1227 s->ref_count[mb->ref_frame - 1]++;
1229 // motion vectors, 16.3
1231 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1233 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1236 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1238 if (mb->mode == MODE_I4x4)
1239 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1241 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1243 mb->ref_frame = VP56_FRAME_CURRENT;
1244 mb->partitioning = VP8_SPLITMVMODE_NONE;
1245 AV_ZERO32(&mb->bmv[0]);
1250 * @param r arithmetic bitstream reader context
1251 * @param block destination for block coefficients
1252 * @param probs probabilities to use when reading trees from the bitstream
1253 * @param i initial coeff index, 0 unless a separate DC block is coded
1254 * @param qmul array holding the dc/ac dequant factor at position 0/1
1256 * @return 0 if no coeffs were decoded
1257 * otherwise, the index of the last coeff decoded plus one
1259 static av_always_inline
1260 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1261 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1262 int i, uint8_t *token_prob, int16_t qmul[2],
1263 const uint8_t scan[16], int vp7)
1265 VP56RangeCoder c = *r;
1270 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1274 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1276 break; // invalid input; blocks should end with EOB
1277 token_prob = probs[i][0];
1283 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1285 token_prob = probs[i + 1][1];
1287 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1288 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1290 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1294 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1295 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1296 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1297 } else { // DCT_CAT2
1299 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1300 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1302 } else { // DCT_CAT3 and up
1303 int a = vp56_rac_get_prob(&c, token_prob[8]);
1304 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1305 int cat = (a << 1) + b;
1306 coeff = 3 + (8 << cat);
1307 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1310 token_prob = probs[i + 1][2];
1312 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1319 static av_always_inline
1320 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1322 int16_t dc = block[0];
1330 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1331 block[0] = pred[0] = dc;
1336 block[0] = pred[0] = dc;
1342 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1344 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1345 int i, uint8_t *token_prob,
1347 const uint8_t scan[16])
1349 return decode_block_coeffs_internal(r, block, probs, i,
1350 token_prob, qmul, scan, IS_VP7);
1353 #ifndef vp8_decode_block_coeffs_internal
1354 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1356 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1357 int i, uint8_t *token_prob,
1360 return decode_block_coeffs_internal(r, block, probs, i,
1361 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1366 * @param c arithmetic bitstream reader context
1367 * @param block destination for block coefficients
1368 * @param probs probabilities to use when reading trees from the bitstream
1369 * @param i initial coeff index, 0 unless a separate DC block is coded
1370 * @param zero_nhood the initial prediction context for number of surrounding
1371 * all-zero blocks (only left/top, so 0-2)
1372 * @param qmul array holding the dc/ac dequant factor at position 0/1
1373 * @param scan scan pattern (VP7 only)
1375 * @return 0 if no coeffs were decoded
1376 * otherwise, the index of the last coeff decoded plus one
1378 static av_always_inline
1379 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1380 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1381 int i, int zero_nhood, int16_t qmul[2],
1382 const uint8_t scan[16], int vp7)
1384 uint8_t *token_prob = probs[i][zero_nhood];
1385 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1387 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1388 token_prob, qmul, scan)
1389 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1393 static av_always_inline
1394 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1395 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1398 int i, x, y, luma_start = 0, luma_ctx = 3;
1399 int nnz_pred, nnz, nnz_total = 0;
1400 int segment = mb->segment;
1403 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1404 nnz_pred = t_nnz[8] + l_nnz[8];
1406 // decode DC values and do hadamard
1407 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1408 nnz_pred, s->qmat[segment].luma_dc_qmul,
1409 ff_zigzag_scan, is_vp7);
1410 l_nnz[8] = t_nnz[8] = !!nnz;
1412 if (is_vp7 && mb->mode > MODE_I4x4) {
1413 nnz |= inter_predict_dc(td->block_dc,
1414 s->inter_dc_pred[mb->ref_frame - 1]);
1421 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1423 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1430 for (y = 0; y < 4; y++)
1431 for (x = 0; x < 4; x++) {
1432 nnz_pred = l_nnz[y] + t_nnz[x];
1433 nnz = decode_block_coeffs(c, td->block[y][x],
1434 s->prob->token[luma_ctx],
1435 luma_start, nnz_pred,
1436 s->qmat[segment].luma_qmul,
1437 s->prob[0].scan, is_vp7);
1438 /* nnz+block_dc may be one more than the actual last index,
1439 * but we don't care */
1440 td->non_zero_count_cache[y][x] = nnz + block_dc;
1441 t_nnz[x] = l_nnz[y] = !!nnz;
1446 // TODO: what to do about dimensions? 2nd dim for luma is x,
1447 // but for chroma it's (y<<1)|x
1448 for (i = 4; i < 6; i++)
1449 for (y = 0; y < 2; y++)
1450 for (x = 0; x < 2; x++) {
1451 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1452 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1453 s->prob->token[2], 0, nnz_pred,
1454 s->qmat[segment].chroma_qmul,
1455 s->prob[0].scan, is_vp7);
1456 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1457 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1461 // if there were no coded coeffs despite the macroblock not being marked skip,
1462 // we MUST not do the inner loop filter and should not do IDCT
1463 // Since skip isn't used for bitstream prediction, just manually set it.
1468 static av_always_inline
1469 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1470 uint8_t *src_cb, uint8_t *src_cr,
1471 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1473 AV_COPY128(top_border, src_y + 15 * linesize);
1475 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1476 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1480 static av_always_inline
1481 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1482 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1483 int mb_y, int mb_width, int simple, int xchg)
1485 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1487 src_cb -= uvlinesize;
1488 src_cr -= uvlinesize;
1490 #define XCHG(a, b, xchg) \
1498 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1499 XCHG(top_border, src_y, xchg);
1500 XCHG(top_border + 8, src_y + 8, 1);
1501 if (mb_x < mb_width - 1)
1502 XCHG(top_border + 32, src_y + 16, 1);
1504 // only copy chroma for normal loop filter
1505 // or to initialize the top row to 127
1506 if (!simple || !mb_y) {
1507 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1508 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1509 XCHG(top_border + 16, src_cb, 1);
1510 XCHG(top_border + 24, src_cr, 1);
1514 static av_always_inline
1515 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1518 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1520 return mb_y ? mode : LEFT_DC_PRED8x8;
1523 static av_always_inline
1524 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1527 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1529 return mb_y ? mode : HOR_PRED8x8;
1532 static av_always_inline
1533 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1537 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1539 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1541 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1542 case PLANE_PRED8x8: /* TM */
1543 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1548 static av_always_inline
1549 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1552 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1554 return mb_y ? mode : HOR_VP8_PRED;
1558 static av_always_inline
1559 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1560 int *copy_buf, int vp7)
1564 if (!mb_x && mb_y) {
1569 case DIAG_DOWN_LEFT_PRED:
1570 case VERT_LEFT_PRED:
1571 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1579 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1581 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1582 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1583 * as 16x16/8x8 DC */
1584 case DIAG_DOWN_RIGHT_PRED:
1585 case VERT_RIGHT_PRED:
1594 static av_always_inline
1595 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1596 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1598 int x, y, mode, nnz;
1601 /* for the first row, we need to run xchg_mb_border to init the top edge
1602 * to 127 otherwise, skip it if we aren't going to deblock */
1603 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1604 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1605 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1606 s->filter.simple, 1);
1608 if (mb->mode < MODE_I4x4) {
1609 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1610 s->hpc.pred16x16[mode](dst[0], s->linesize);
1612 uint8_t *ptr = dst[0];
1613 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1614 const uint8_t lo = is_vp7 ? 128 : 127;
1615 const uint8_t hi = is_vp7 ? 128 : 129;
1616 uint8_t tr_top[4] = { lo, lo, lo, lo };
1618 // all blocks on the right edge of the macroblock use bottom edge
1619 // the top macroblock for their topright edge
1620 uint8_t *tr_right = ptr - s->linesize + 16;
1622 // if we're on the right edge of the frame, said edge is extended
1623 // from the top macroblock
1624 if (mb_y && mb_x == s->mb_width - 1) {
1625 tr = tr_right[-1] * 0x01010101u;
1626 tr_right = (uint8_t *) &tr;
1630 AV_ZERO128(td->non_zero_count_cache);
1632 for (y = 0; y < 4; y++) {
1633 uint8_t *topright = ptr + 4 - s->linesize;
1634 for (x = 0; x < 4; x++) {
1636 ptrdiff_t linesize = s->linesize;
1637 uint8_t *dst = ptr + 4 * x;
1638 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1640 if ((y == 0 || x == 3) && mb_y == 0) {
1643 topright = tr_right;
1645 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1646 mb_y + y, ©, is_vp7);
1648 dst = copy_dst + 12;
1652 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1654 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1658 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1667 copy_dst[11] = ptr[4 * x - 1];
1668 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1669 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1670 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1673 s->hpc.pred4x4[mode](dst, topright, linesize);
1675 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1676 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1677 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1678 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1681 nnz = td->non_zero_count_cache[y][x];
1684 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1685 td->block[y][x], s->linesize);
1687 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1688 td->block[y][x], s->linesize);
1693 ptr += 4 * s->linesize;
1698 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1699 mb_x, mb_y, is_vp7);
1700 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1701 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1703 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1704 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1705 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1706 s->filter.simple, 0);
1709 static const uint8_t subpel_idx[3][8] = {
1710 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1711 // also function pointer index
1712 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1713 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1719 * @param s VP8 decoding context
1720 * @param dst target buffer for block data at block position
1721 * @param ref reference picture buffer at origin (0, 0)
1722 * @param mv motion vector (relative to block position) to get pixel data from
1723 * @param x_off horizontal position of block from origin (0, 0)
1724 * @param y_off vertical position of block from origin (0, 0)
1725 * @param block_w width of block (16, 8 or 4)
1726 * @param block_h height of block (always same as block_w)
1727 * @param width width of src/dst plane data
1728 * @param height height of src/dst plane data
1729 * @param linesize size of a single line of plane data, including padding
1730 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1732 static av_always_inline
1733 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1734 ThreadFrame *ref, const VP56mv *mv,
1735 int x_off, int y_off, int block_w, int block_h,
1736 int width, int height, ptrdiff_t linesize,
1737 vp8_mc_func mc_func[3][3])
1739 uint8_t *src = ref->f->data[0];
1742 ptrdiff_t src_linesize = linesize;
1744 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1745 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1747 x_off += mv->x >> 2;
1748 y_off += mv->y >> 2;
1751 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1752 src += y_off * linesize + x_off;
1753 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1754 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1755 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1756 src - my_idx * linesize - mx_idx,
1757 EDGE_EMU_LINESIZE, linesize,
1758 block_w + subpel_idx[1][mx],
1759 block_h + subpel_idx[1][my],
1760 x_off - mx_idx, y_off - my_idx,
1762 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1763 src_linesize = EDGE_EMU_LINESIZE;
1765 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1767 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1768 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1769 linesize, block_h, 0, 0);
1774 * chroma MC function
1776 * @param s VP8 decoding context
1777 * @param dst1 target buffer for block data at block position (U plane)
1778 * @param dst2 target buffer for block data at block position (V plane)
1779 * @param ref reference picture buffer at origin (0, 0)
1780 * @param mv motion vector (relative to block position) to get pixel data from
1781 * @param x_off horizontal position of block from origin (0, 0)
1782 * @param y_off vertical position of block from origin (0, 0)
1783 * @param block_w width of block (16, 8 or 4)
1784 * @param block_h height of block (always same as block_w)
1785 * @param width width of src/dst plane data
1786 * @param height height of src/dst plane data
1787 * @param linesize size of a single line of plane data, including padding
1788 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1790 static av_always_inline
1791 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1792 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1793 int x_off, int y_off, int block_w, int block_h,
1794 int width, int height, ptrdiff_t linesize,
1795 vp8_mc_func mc_func[3][3])
1797 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1800 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1801 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1803 x_off += mv->x >> 3;
1804 y_off += mv->y >> 3;
1807 src1 += y_off * linesize + x_off;
1808 src2 += y_off * linesize + x_off;
1809 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1810 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1811 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1812 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1813 src1 - my_idx * linesize - mx_idx,
1814 EDGE_EMU_LINESIZE, linesize,
1815 block_w + subpel_idx[1][mx],
1816 block_h + subpel_idx[1][my],
1817 x_off - mx_idx, y_off - my_idx, width, height);
1818 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1819 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1821 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1822 src2 - my_idx * linesize - mx_idx,
1823 EDGE_EMU_LINESIZE, linesize,
1824 block_w + subpel_idx[1][mx],
1825 block_h + subpel_idx[1][my],
1826 x_off - mx_idx, y_off - my_idx, width, height);
1827 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1828 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1830 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1831 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1834 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1835 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1836 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1840 static av_always_inline
1841 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1842 ThreadFrame *ref_frame, int x_off, int y_off,
1843 int bx_off, int by_off, int block_w, int block_h,
1844 int width, int height, VP56mv *mv)
1849 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1850 ref_frame, mv, x_off + bx_off, y_off + by_off,
1851 block_w, block_h, width, height, s->linesize,
1852 s->put_pixels_tab[block_w == 8]);
1855 if (s->profile == 3) {
1856 /* this block only applies VP8; it is safe to check
1857 * only the profile, as VP7 profile <= 1 */
1869 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1870 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1871 &uvmv, x_off + bx_off, y_off + by_off,
1872 block_w, block_h, width, height, s->uvlinesize,
1873 s->put_pixels_tab[1 + (block_w == 4)]);
1876 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1877 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1878 static av_always_inline
1879 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1882 /* Don't prefetch refs that haven't been used very often this frame. */
1883 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1884 int x_off = mb_x << 4, y_off = mb_y << 4;
1885 int mx = (mb->mv.x >> 2) + x_off + 8;
1886 int my = (mb->mv.y >> 2) + y_off;
1887 uint8_t **src = s->framep[ref]->tf.f->data;
1888 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1889 /* For threading, a ff_thread_await_progress here might be useful, but
1890 * it actually slows down the decoder. Since a bad prefetch doesn't
1891 * generate bad decoder output, we don't run it here. */
1892 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1893 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1894 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1899 * Apply motion vectors to prediction buffer, chapter 18.
1901 static av_always_inline
1902 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1903 VP8Macroblock *mb, int mb_x, int mb_y)
1905 int x_off = mb_x << 4, y_off = mb_y << 4;
1906 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1907 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1908 VP56mv *bmv = mb->bmv;
1910 switch (mb->partitioning) {
1911 case VP8_SPLITMVMODE_NONE:
1912 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1913 0, 0, 16, 16, width, height, &mb->mv);
1915 case VP8_SPLITMVMODE_4x4: {
1920 for (y = 0; y < 4; y++) {
1921 for (x = 0; x < 4; x++) {
1922 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1923 ref, &bmv[4 * y + x],
1924 4 * x + x_off, 4 * y + y_off, 4, 4,
1925 width, height, s->linesize,
1926 s->put_pixels_tab[2]);
1935 for (y = 0; y < 2; y++) {
1936 for (x = 0; x < 2; x++) {
1937 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1938 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1939 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1940 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1941 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1942 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1943 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1944 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1945 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1946 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1947 if (s->profile == 3) {
1951 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1952 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1953 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1954 width, height, s->uvlinesize,
1955 s->put_pixels_tab[2]);
1960 case VP8_SPLITMVMODE_16x8:
1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1962 0, 0, 16, 8, width, height, &bmv[0]);
1963 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1964 0, 8, 16, 8, width, height, &bmv[1]);
1966 case VP8_SPLITMVMODE_8x16:
1967 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1968 0, 0, 8, 16, width, height, &bmv[0]);
1969 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1970 8, 0, 8, 16, width, height, &bmv[1]);
1972 case VP8_SPLITMVMODE_8x8:
1973 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1974 0, 0, 8, 8, width, height, &bmv[0]);
1975 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1976 8, 0, 8, 8, width, height, &bmv[1]);
1977 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1978 0, 8, 8, 8, width, height, &bmv[2]);
1979 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1980 8, 8, 8, 8, width, height, &bmv[3]);
1985 static av_always_inline
1986 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1990 if (mb->mode != MODE_I4x4) {
1991 uint8_t *y_dst = dst[0];
1992 for (y = 0; y < 4; y++) {
1993 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1995 if (nnz4 & ~0x01010101) {
1996 for (x = 0; x < 4; x++) {
1997 if ((uint8_t) nnz4 == 1)
1998 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2001 else if ((uint8_t) nnz4 > 1)
2002 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2010 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2013 y_dst += 4 * s->linesize;
2017 for (ch = 0; ch < 2; ch++) {
2018 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2020 uint8_t *ch_dst = dst[1 + ch];
2021 if (nnz4 & ~0x01010101) {
2022 for (y = 0; y < 2; y++) {
2023 for (x = 0; x < 2; x++) {
2024 if ((uint8_t) nnz4 == 1)
2025 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2026 td->block[4 + ch][(y << 1) + x],
2028 else if ((uint8_t) nnz4 > 1)
2029 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2030 td->block[4 + ch][(y << 1) + x],
2034 goto chroma_idct_end;
2036 ch_dst += 4 * s->uvlinesize;
2039 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2047 static av_always_inline
2048 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2049 VP8FilterStrength *f, int is_vp7)
2051 int interior_limit, filter_level;
2053 if (s->segmentation.enabled) {
2054 filter_level = s->segmentation.filter_level[mb->segment];
2055 if (!s->segmentation.absolute_vals)
2056 filter_level += s->filter.level;
2058 filter_level = s->filter.level;
2060 if (s->lf_delta.enabled) {
2061 filter_level += s->lf_delta.ref[mb->ref_frame];
2062 filter_level += s->lf_delta.mode[mb->mode];
2065 filter_level = av_clip_uintp2(filter_level, 6);
2067 interior_limit = filter_level;
2068 if (s->filter.sharpness) {
2069 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2070 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2072 interior_limit = FFMAX(interior_limit, 1);
2074 f->filter_level = filter_level;
2075 f->inner_limit = interior_limit;
2076 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2077 mb->mode == VP8_MVMODE_SPLIT;
2080 static av_always_inline
2081 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2082 int mb_x, int mb_y, int is_vp7)
2084 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2085 int filter_level = f->filter_level;
2086 int inner_limit = f->inner_limit;
2087 int inner_filter = f->inner_filter;
2088 ptrdiff_t linesize = s->linesize;
2089 ptrdiff_t uvlinesize = s->uvlinesize;
2090 static const uint8_t hev_thresh_lut[2][64] = {
2091 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2092 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2093 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2095 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2096 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2097 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2105 bedge_lim_y = filter_level;
2106 bedge_lim_uv = filter_level * 2;
2107 mbedge_lim = filter_level + 2;
2110 bedge_lim_uv = filter_level * 2 + inner_limit;
2111 mbedge_lim = bedge_lim_y + 4;
2114 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2117 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2118 mbedge_lim, inner_limit, hev_thresh);
2119 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2120 mbedge_lim, inner_limit, hev_thresh);
2123 #define H_LOOP_FILTER_16Y_INNER(cond) \
2124 if (cond && inner_filter) { \
2125 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2126 bedge_lim_y, inner_limit, \
2128 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2129 bedge_lim_y, inner_limit, \
2131 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2132 bedge_lim_y, inner_limit, \
2134 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2135 uvlinesize, bedge_lim_uv, \
2136 inner_limit, hev_thresh); \
2139 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2142 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2143 mbedge_lim, inner_limit, hev_thresh);
2144 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2145 mbedge_lim, inner_limit, hev_thresh);
2149 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2150 linesize, bedge_lim_y,
2151 inner_limit, hev_thresh);
2152 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2153 linesize, bedge_lim_y,
2154 inner_limit, hev_thresh);
2155 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2156 linesize, bedge_lim_y,
2157 inner_limit, hev_thresh);
2158 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2159 dst[2] + 4 * uvlinesize,
2160 uvlinesize, bedge_lim_uv,
2161 inner_limit, hev_thresh);
2164 H_LOOP_FILTER_16Y_INNER(is_vp7)
2167 static av_always_inline
2168 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2171 int mbedge_lim, bedge_lim;
2172 int filter_level = f->filter_level;
2173 int inner_limit = f->inner_limit;
2174 int inner_filter = f->inner_filter;
2175 ptrdiff_t linesize = s->linesize;
2180 bedge_lim = 2 * filter_level + inner_limit;
2181 mbedge_lim = bedge_lim + 4;
2184 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2186 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2187 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2188 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2192 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2194 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2195 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2196 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2200 #define MARGIN (16 << 2)
2201 static av_always_inline
2202 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2203 VP8Frame *prev_frame, int is_vp7)
2205 VP8Context *s = avctx->priv_data;
2208 s->mv_min.y = -MARGIN;
2209 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2210 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2211 VP8Macroblock *mb = s->macroblocks_base +
2212 ((s->mb_width + 1) * (mb_y + 1) + 1);
2213 int mb_xy = mb_y * s->mb_width;
2215 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2217 s->mv_min.x = -MARGIN;
2218 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2219 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2221 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2222 DC_PRED * 0x01010101);
2223 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2224 prev_frame && prev_frame->seg_map ?
2225 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2234 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2235 VP8Frame *prev_frame)
2237 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2240 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2241 VP8Frame *prev_frame)
2243 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2247 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2249 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2250 if (otd->thread_mb_pos < tmp) { \
2251 pthread_mutex_lock(&otd->lock); \
2252 td->wait_mb_pos = tmp; \
2254 if (otd->thread_mb_pos >= tmp) \
2256 pthread_cond_wait(&otd->cond, &otd->lock); \
2258 td->wait_mb_pos = INT_MAX; \
2259 pthread_mutex_unlock(&otd->lock); \
2263 #define update_pos(td, mb_y, mb_x) \
2265 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2266 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2268 int is_null = !next_td || !prev_td; \
2269 int pos_check = (is_null) ? 1 \
2270 : (next_td != td && \
2271 pos >= next_td->wait_mb_pos) || \
2273 pos >= prev_td->wait_mb_pos); \
2274 td->thread_mb_pos = pos; \
2275 if (sliced_threading && pos_check) { \
2276 pthread_mutex_lock(&td->lock); \
2277 pthread_cond_broadcast(&td->cond); \
2278 pthread_mutex_unlock(&td->lock); \
2282 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2283 #define update_pos(td, mb_y, mb_x) while(0)
2286 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2287 int jobnr, int threadnr, int is_vp7)
2289 VP8Context *s = avctx->priv_data;
2290 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2291 int mb_y = td->thread_mb_pos >> 16;
2292 int mb_x, mb_xy = mb_y * s->mb_width;
2293 int num_jobs = s->num_jobs;
2294 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2295 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2298 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2299 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2300 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2303 if (c->end <= c->buffer && c->bits >= 0)
2304 return AVERROR_INVALIDDATA;
2309 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2310 if (mb_y == s->mb_height - 1)
2313 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2314 if (s->mb_layout == 1)
2315 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2317 // Make sure the previous frame has read its segmentation map,
2318 // if we re-use the same map.
2319 if (prev_frame && s->segmentation.enabled &&
2320 !s->segmentation.update_map)
2321 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2322 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2323 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2324 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2327 if (!is_vp7 || mb_y == 0)
2328 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2330 s->mv_min.x = -MARGIN;
2331 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2333 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2334 if (c->end <= c->buffer && c->bits >= 0)
2335 return AVERROR_INVALIDDATA;
2336 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2337 if (prev_td != td) {
2338 if (threadnr != 0) {
2339 check_thread_pos(td, prev_td,
2340 mb_x + (is_vp7 ? 2 : 1),
2341 mb_y - (is_vp7 ? 2 : 1));
2343 check_thread_pos(td, prev_td,
2344 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2345 mb_y - (is_vp7 ? 2 : 1));
2349 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2351 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2352 dst[2] - dst[1], 2);
2355 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2356 prev_frame && prev_frame->seg_map ?
2357 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2359 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2362 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2364 if (mb->mode <= MODE_I4x4)
2365 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2367 inter_predict(s, td, dst, mb, mb_x, mb_y);
2369 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2372 idct_mb(s, td, dst, mb);
2374 AV_ZERO64(td->left_nnz);
2375 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2377 /* Reset DC block predictors if they would exist
2378 * if the mb had coefficients */
2379 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2380 td->left_nnz[8] = 0;
2381 s->top_nnz[mb_x][8] = 0;
2385 if (s->deblock_filter)
2386 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2388 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2389 if (s->filter.simple)
2390 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2391 NULL, NULL, s->linesize, 0, 1);
2393 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2394 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2397 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2405 if (mb_x == s->mb_width + 1) {
2406 update_pos(td, mb_y, s->mb_width + 3);
2408 update_pos(td, mb_y, mb_x);
2414 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2415 int jobnr, int threadnr)
2417 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2420 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2421 int jobnr, int threadnr)
2423 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2426 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2427 int jobnr, int threadnr, int is_vp7)
2429 VP8Context *s = avctx->priv_data;
2430 VP8ThreadData *td = &s->thread_data[threadnr];
2431 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2432 AVFrame *curframe = s->curframe->tf.f;
2434 VP8ThreadData *prev_td, *next_td;
2436 curframe->data[0] + 16 * mb_y * s->linesize,
2437 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2438 curframe->data[2] + 8 * mb_y * s->uvlinesize
2441 if (s->mb_layout == 1)
2442 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2444 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2449 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2450 if (mb_y == s->mb_height - 1)
2453 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2455 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2456 VP8FilterStrength *f = &td->filter_strength[mb_x];
2458 check_thread_pos(td, prev_td,
2459 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2461 if (next_td != &s->thread_data[0])
2462 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2464 if (num_jobs == 1) {
2465 if (s->filter.simple)
2466 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2467 NULL, NULL, s->linesize, 0, 1);
2469 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2470 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2473 if (s->filter.simple)
2474 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2476 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2481 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2485 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2486 int jobnr, int threadnr)
2488 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2491 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2492 int jobnr, int threadnr)
2494 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2497 static av_always_inline
2498 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2499 int threadnr, int is_vp7)
2501 VP8Context *s = avctx->priv_data;
2502 VP8ThreadData *td = &s->thread_data[jobnr];
2503 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2504 VP8Frame *curframe = s->curframe;
2505 int mb_y, num_jobs = s->num_jobs;
2508 td->thread_nr = threadnr;
2509 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2510 td->thread_mb_pos = mb_y << 16;
2511 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2513 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2516 if (s->deblock_filter)
2517 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2518 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2523 if (avctx->active_thread_type == FF_THREAD_FRAME)
2524 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2530 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2531 int jobnr, int threadnr)
2533 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2536 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2537 int jobnr, int threadnr)
2539 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2543 static av_always_inline
2544 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2545 AVPacket *avpkt, int is_vp7)
2547 VP8Context *s = avctx->priv_data;
2548 int ret, i, referenced, num_jobs;
2549 enum AVDiscard skip_thresh;
2550 VP8Frame *av_uninit(curframe), *prev_frame;
2553 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2555 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2560 prev_frame = s->framep[VP56_FRAME_CURRENT];
2562 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2563 s->update_altref == VP56_FRAME_CURRENT;
2565 skip_thresh = !referenced ? AVDISCARD_NONREF
2566 : !s->keyframe ? AVDISCARD_NONKEY
2569 if (avctx->skip_frame >= skip_thresh) {
2571 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2574 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2576 // release no longer referenced frames
2577 for (i = 0; i < 5; i++)
2578 if (s->frames[i].tf.f->data[0] &&
2579 &s->frames[i] != prev_frame &&
2580 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2581 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2582 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2583 vp8_release_frame(s, &s->frames[i]);
2585 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2588 avctx->colorspace = AVCOL_SPC_BT470BG;
2590 avctx->color_range = AVCOL_RANGE_JPEG;
2592 avctx->color_range = AVCOL_RANGE_MPEG;
2594 /* Given that arithmetic probabilities are updated every frame, it's quite
2595 * likely that the values we have on a random interframe are complete
2596 * junk if we didn't start decode on a keyframe. So just don't display
2597 * anything rather than junk. */
2598 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2599 !s->framep[VP56_FRAME_GOLDEN] ||
2600 !s->framep[VP56_FRAME_GOLDEN2])) {
2601 av_log(avctx, AV_LOG_WARNING,
2602 "Discarding interframe without a prior keyframe!\n");
2603 ret = AVERROR_INVALIDDATA;
2607 curframe->tf.f->key_frame = s->keyframe;
2608 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2609 : AV_PICTURE_TYPE_P;
2610 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2613 // check if golden and altref are swapped
2614 if (s->update_altref != VP56_FRAME_NONE)
2615 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2617 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2619 if (s->update_golden != VP56_FRAME_NONE)
2620 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2622 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2625 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2627 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2629 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2631 if (avctx->codec->update_thread_context)
2632 ff_thread_finish_setup(avctx);
2634 s->linesize = curframe->tf.f->linesize[0];
2635 s->uvlinesize = curframe->tf.f->linesize[1];
2637 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2638 /* Zero macroblock structures for top/top-left prediction
2639 * from outside the frame. */
2641 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2642 (s->mb_width + 1) * sizeof(*s->macroblocks));
2643 if (!s->mb_layout && s->keyframe)
2644 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2646 memset(s->ref_count, 0, sizeof(s->ref_count));
2648 if (s->mb_layout == 1) {
2649 // Make sure the previous frame has read its segmentation map,
2650 // if we re-use the same map.
2651 if (prev_frame && s->segmentation.enabled &&
2652 !s->segmentation.update_map)
2653 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2655 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2657 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2660 if (avctx->active_thread_type == FF_THREAD_FRAME)
2663 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2664 s->num_jobs = num_jobs;
2665 s->curframe = curframe;
2666 s->prev_frame = prev_frame;
2667 s->mv_min.y = -MARGIN;
2668 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2669 for (i = 0; i < MAX_THREADS; i++) {
2670 s->thread_data[i].thread_mb_pos = 0;
2671 s->thread_data[i].wait_mb_pos = INT_MAX;
2674 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2677 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2680 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2681 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2684 // if future frames don't use the updated probabilities,
2685 // reset them to the values we saved
2686 if (!s->update_probabilities)
2687 s->prob[0] = s->prob[1];
2689 if (!s->invisible) {
2690 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2697 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2701 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2704 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2707 #if CONFIG_VP7_DECODER
2708 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2711 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2713 #endif /* CONFIG_VP7_DECODER */
2715 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2717 VP8Context *s = avctx->priv_data;
2723 vp8_decode_flush_impl(avctx, 1);
2724 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2725 av_frame_free(&s->frames[i].tf.f);
2730 static av_cold int vp8_init_frames(VP8Context *s)
2733 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2734 s->frames[i].tf.f = av_frame_alloc();
2735 if (!s->frames[i].tf.f)
2736 return AVERROR(ENOMEM);
2741 static av_always_inline
2742 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2744 VP8Context *s = avctx->priv_data;
2748 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2749 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2750 avctx->internal->allocate_progress = 1;
2752 ff_videodsp_init(&s->vdsp, 8);
2754 ff_vp78dsp_init(&s->vp8dsp);
2755 if (CONFIG_VP7_DECODER && is_vp7) {
2756 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2757 ff_vp7dsp_init(&s->vp8dsp);
2758 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2759 s->filter_mb_row = vp7_filter_mb_row;
2760 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2761 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2762 ff_vp8dsp_init(&s->vp8dsp);
2763 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2764 s->filter_mb_row = vp8_filter_mb_row;
2767 /* does not change for VP8 */
2768 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2770 if ((ret = vp8_init_frames(s)) < 0) {
2771 ff_vp8_decode_free(avctx);
2778 #if CONFIG_VP7_DECODER
2779 static int vp7_decode_init(AVCodecContext *avctx)
2781 return vp78_decode_init(avctx, IS_VP7);
2783 #endif /* CONFIG_VP7_DECODER */
2785 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2787 return vp78_decode_init(avctx, IS_VP8);
2790 #if CONFIG_VP8_DECODER
2792 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2794 VP8Context *s = avctx->priv_data;
2799 if ((ret = vp8_init_frames(s)) < 0) {
2800 ff_vp8_decode_free(avctx);
2807 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2809 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2810 const AVCodecContext *src)
2812 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2815 if (s->macroblocks_base &&
2816 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2818 s->mb_width = s_src->mb_width;
2819 s->mb_height = s_src->mb_height;
2822 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2823 s->segmentation = s_src->segmentation;
2824 s->lf_delta = s_src->lf_delta;
2825 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2827 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2828 if (s_src->frames[i].tf.f->data[0]) {
2829 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2835 s->framep[0] = REBASE(s_src->next_framep[0]);
2836 s->framep[1] = REBASE(s_src->next_framep[1]);
2837 s->framep[2] = REBASE(s_src->next_framep[2]);
2838 s->framep[3] = REBASE(s_src->next_framep[3]);
2842 #endif /* HAVE_THREADS */
2843 #endif /* CONFIG_VP8_DECODER */
2845 #if CONFIG_VP7_DECODER
2846 AVCodec ff_vp7_decoder = {
2848 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2849 .type = AVMEDIA_TYPE_VIDEO,
2850 .id = AV_CODEC_ID_VP7,
2851 .priv_data_size = sizeof(VP8Context),
2852 .init = vp7_decode_init,
2853 .close = ff_vp8_decode_free,
2854 .decode = vp7_decode_frame,
2855 .capabilities = AV_CODEC_CAP_DR1,
2856 .flush = vp8_decode_flush,
2858 #endif /* CONFIG_VP7_DECODER */
2860 #if CONFIG_VP8_DECODER
2861 AVCodec ff_vp8_decoder = {
2863 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2864 .type = AVMEDIA_TYPE_VIDEO,
2865 .id = AV_CODEC_ID_VP8,
2866 .priv_data_size = sizeof(VP8Context),
2867 .init = ff_vp8_decode_init,
2868 .close = ff_vp8_decode_free,
2869 .decode = ff_vp8_decode_frame,
2870 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2871 AV_CODEC_CAP_SLICE_THREADS,
2872 .flush = vp8_decode_flush,
2873 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2874 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2876 #endif /* CONFIG_VP7_DECODER */