2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
42 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
43 #elif CONFIG_VP7_DECODER
44 #define VPX(vp7, f) vp7_ ## f
45 #else // CONFIG_VP8_DECODER
46 #define VPX(vp7, f) vp8_ ## f
49 static void free_buffers(VP8Context *s)
53 for (i = 0; i < MAX_THREADS; i++) {
55 pthread_cond_destroy(&s->thread_data[i].cond);
56 pthread_mutex_destroy(&s->thread_data[i].lock);
58 av_freep(&s->thread_data[i].filter_strength);
60 av_freep(&s->thread_data);
61 av_freep(&s->macroblocks_base);
62 av_freep(&s->intra4x4_pred_mode_top);
63 av_freep(&s->top_nnz);
64 av_freep(&s->top_border);
66 s->macroblocks = NULL;
69 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
72 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
73 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76 ff_thread_release_buffer(s->avctx, &f->tf);
77 return AVERROR(ENOMEM);
82 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
84 av_buffer_unref(&f->seg_map);
85 ff_thread_release_buffer(s->avctx, &f->tf);
88 #if CONFIG_VP8_DECODER
89 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
93 vp8_release_frame(s, dst);
95 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
98 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
99 vp8_release_frame(s, dst);
100 return AVERROR(ENOMEM);
105 #endif /* CONFIG_VP8_DECODER */
107 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
109 VP8Context *s = avctx->priv_data;
112 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
113 vp8_release_frame(s, &s->frames[i]);
114 memset(s->framep, 0, sizeof(s->framep));
120 static void vp8_decode_flush(AVCodecContext *avctx)
122 vp8_decode_flush_impl(avctx, 0);
125 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
127 VP8Frame *frame = NULL;
130 // find a free buffer
131 for (i = 0; i < 5; i++)
132 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
133 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
135 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
136 frame = &s->frames[i];
140 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
143 if (frame->tf.f->data[0])
144 vp8_release_frame(s, frame);
149 static av_always_inline
150 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
152 AVCodecContext *avctx = s->avctx;
155 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
156 height != s->avctx->height) {
157 vp8_decode_flush_impl(s->avctx, 1);
159 ret = ff_set_dimensions(s->avctx, width, height);
164 s->mb_width = (s->avctx->coded_width + 15) / 16;
165 s->mb_height = (s->avctx->coded_height + 15) / 16;
167 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
168 avctx->thread_count > 1;
169 if (!s->mb_layout) { // Frame threading and one thread
170 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
171 sizeof(*s->macroblocks));
172 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
173 } else // Sliced threading
174 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
175 sizeof(*s->macroblocks));
176 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
177 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
180 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
181 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
183 return AVERROR(ENOMEM);
186 for (i = 0; i < MAX_THREADS; i++) {
187 s->thread_data[i].filter_strength =
188 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
189 if (!s->thread_data[i].filter_strength) {
191 return AVERROR(ENOMEM);
194 pthread_mutex_init(&s->thread_data[i].lock, NULL);
195 pthread_cond_init(&s->thread_data[i].cond, NULL);
199 s->macroblocks = s->macroblocks_base + 1;
204 static int vp7_update_dimensions(VP8Context *s, int width, int height)
206 return update_dimensions(s, width, height, IS_VP7);
209 static int vp8_update_dimensions(VP8Context *s, int width, int height)
211 return update_dimensions(s, width, height, IS_VP8);
215 static void parse_segment_info(VP8Context *s)
217 VP56RangeCoder *c = &s->c;
220 s->segmentation.update_map = vp8_rac_get(c);
222 if (vp8_rac_get(c)) { // update segment feature data
223 s->segmentation.absolute_vals = vp8_rac_get(c);
225 for (i = 0; i < 4; i++)
226 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
228 for (i = 0; i < 4; i++)
229 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
231 if (s->segmentation.update_map)
232 for (i = 0; i < 3; i++)
233 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
236 static void update_lf_deltas(VP8Context *s)
238 VP56RangeCoder *c = &s->c;
241 for (i = 0; i < 4; i++) {
242 if (vp8_rac_get(c)) {
243 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
246 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
250 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
251 if (vp8_rac_get(c)) {
252 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
255 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
260 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
262 const uint8_t *sizes = buf;
266 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
268 buf += 3 * (s->num_coeff_partitions - 1);
269 buf_size -= 3 * (s->num_coeff_partitions - 1);
273 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
274 int size = AV_RL24(sizes + 3 * i);
275 if (buf_size - size < 0)
278 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
284 return ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
287 static void vp7_get_quants(VP8Context *s)
289 VP56RangeCoder *c = &s->c;
291 int yac_qi = vp8_rac_get_uint(c, 7);
292 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
295 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
296 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
298 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
299 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
300 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
301 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
302 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
303 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
306 static void vp8_get_quants(VP8Context *s)
308 VP56RangeCoder *c = &s->c;
311 int yac_qi = vp8_rac_get_uint(c, 7);
312 int ydc_delta = vp8_rac_get_sint(c, 4);
313 int y2dc_delta = vp8_rac_get_sint(c, 4);
314 int y2ac_delta = vp8_rac_get_sint(c, 4);
315 int uvdc_delta = vp8_rac_get_sint(c, 4);
316 int uvac_delta = vp8_rac_get_sint(c, 4);
318 for (i = 0; i < 4; i++) {
319 if (s->segmentation.enabled) {
320 base_qi = s->segmentation.base_quant[i];
321 if (!s->segmentation.absolute_vals)
326 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
327 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
328 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
329 /* 101581>>16 is equivalent to 155/100 */
330 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
331 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
332 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
334 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
335 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
340 * Determine which buffers golden and altref should be updated with after this frame.
341 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
343 * Intra frames update all 3 references
344 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
345 * If the update (golden|altref) flag is set, it's updated with the current frame
346 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
347 * If the flag is not set, the number read means:
349 * 1: VP56_FRAME_PREVIOUS
350 * 2: update golden with altref, or update altref with golden
352 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
354 VP56RangeCoder *c = &s->c;
357 return VP56_FRAME_CURRENT;
359 switch (vp8_rac_get_uint(c, 2)) {
361 return VP56_FRAME_PREVIOUS;
363 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
365 return VP56_FRAME_NONE;
368 static void vp78_reset_probability_tables(VP8Context *s)
371 for (i = 0; i < 4; i++)
372 for (j = 0; j < 16; j++)
373 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
374 sizeof(s->prob->token[i][j]));
377 static void vp78_update_probability_tables(VP8Context *s)
379 VP56RangeCoder *c = &s->c;
382 for (i = 0; i < 4; i++)
383 for (j = 0; j < 8; j++)
384 for (k = 0; k < 3; k++)
385 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
386 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
387 int prob = vp8_rac_get_uint(c, 8);
388 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
389 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
393 #define VP7_MVC_SIZE 17
394 #define VP8_MVC_SIZE 19
396 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
399 VP56RangeCoder *c = &s->c;
403 for (i = 0; i < 4; i++)
404 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
406 for (i = 0; i < 3; i++)
407 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
409 // 17.2 MV probability update
410 for (i = 0; i < 2; i++)
411 for (j = 0; j < mvc_size; j++)
412 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
413 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
416 static void update_refs(VP8Context *s)
418 VP56RangeCoder *c = &s->c;
420 int update_golden = vp8_rac_get(c);
421 int update_altref = vp8_rac_get(c);
423 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
424 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
427 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
431 for (j = 1; j < 3; j++) {
432 for (i = 0; i < height / 2; i++)
433 memcpy(dst->data[j] + i * dst->linesize[j],
434 src->data[j] + i * src->linesize[j], width / 2);
438 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
439 const uint8_t *src, ptrdiff_t src_linesize,
440 int width, int height,
444 for (j = 0; j < height; j++) {
445 for (i = 0; i < width; i++) {
446 uint8_t y = src[j * src_linesize + i];
447 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
452 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
454 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
455 int beta = (int8_t) vp8_rac_get_uint(c, 8);
458 if (!s->keyframe && (alpha || beta)) {
459 int width = s->mb_width * 16;
460 int height = s->mb_height * 16;
463 if (!s->framep[VP56_FRAME_PREVIOUS] ||
464 !s->framep[VP56_FRAME_GOLDEN]) {
465 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
466 return AVERROR_INVALIDDATA;
470 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
472 /* preserve the golden frame, write a new previous frame */
473 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
474 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
475 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
478 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
480 copy_chroma(dst, src, width, height);
483 fade(dst->data[0], dst->linesize[0],
484 src->data[0], src->linesize[0],
485 width, height, alpha, beta);
491 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
493 VP56RangeCoder *c = &s->c;
494 int part1_size, hscale, vscale, i, j, ret;
495 int width = s->avctx->width;
496 int height = s->avctx->height;
499 return AVERROR_INVALIDDATA;
502 s->profile = (buf[0] >> 1) & 7;
503 if (s->profile > 1) {
504 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
505 return AVERROR_INVALIDDATA;
508 s->keyframe = !(buf[0] & 1);
510 part1_size = AV_RL24(buf) >> 4;
512 if (buf_size < 4 - s->profile + part1_size) {
513 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
514 return AVERROR_INVALIDDATA;
517 buf += 4 - s->profile;
518 buf_size -= 4 - s->profile;
520 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
522 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
526 buf_size -= part1_size;
528 /* A. Dimension information (keyframes only) */
530 width = vp8_rac_get_uint(c, 12);
531 height = vp8_rac_get_uint(c, 12);
532 hscale = vp8_rac_get_uint(c, 2);
533 vscale = vp8_rac_get_uint(c, 2);
534 if (hscale || vscale)
535 avpriv_request_sample(s->avctx, "Upscaling");
537 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
538 vp78_reset_probability_tables(s);
539 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
540 sizeof(s->prob->pred16x16));
541 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
542 sizeof(s->prob->pred8x8c));
543 for (i = 0; i < 2; i++)
544 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
545 sizeof(vp7_mv_default_prob[i]));
546 memset(&s->segmentation, 0, sizeof(s->segmentation));
547 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
548 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
551 if (s->keyframe || s->profile > 0)
552 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
554 /* B. Decoding information for all four macroblock-level features */
555 for (i = 0; i < 4; i++) {
556 s->feature_enabled[i] = vp8_rac_get(c);
557 if (s->feature_enabled[i]) {
558 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
560 for (j = 0; j < 3; j++)
561 s->feature_index_prob[i][j] =
562 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
564 if (vp7_feature_value_size[s->profile][i])
565 for (j = 0; j < 4; j++)
566 s->feature_value[i][j] =
567 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
571 s->segmentation.enabled = 0;
572 s->segmentation.update_map = 0;
573 s->lf_delta.enabled = 0;
575 s->num_coeff_partitions = 1;
576 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
580 if (!s->macroblocks_base || /* first frame */
581 width != s->avctx->width || height != s->avctx->height ||
582 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
583 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
587 /* C. Dequantization indices */
590 /* D. Golden frame update flag (a Flag) for interframes only */
592 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
593 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
597 s->update_probabilities = 1;
600 if (s->profile > 0) {
601 s->update_probabilities = vp8_rac_get(c);
602 if (!s->update_probabilities)
603 s->prob[1] = s->prob[0];
606 s->fade_present = vp8_rac_get(c);
609 /* E. Fading information for previous frame */
610 if (s->fade_present && vp8_rac_get(c)) {
611 if ((ret = vp7_fade_frame(s ,c)) < 0)
615 /* F. Loop filter type */
617 s->filter.simple = vp8_rac_get(c);
619 /* G. DCT coefficient ordering specification */
621 for (i = 1; i < 16; i++)
622 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
624 /* H. Loop filter levels */
626 s->filter.simple = vp8_rac_get(c);
627 s->filter.level = vp8_rac_get_uint(c, 6);
628 s->filter.sharpness = vp8_rac_get_uint(c, 3);
630 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
631 vp78_update_probability_tables(s);
633 s->mbskip_enabled = 0;
635 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
637 s->prob->intra = vp8_rac_get_uint(c, 8);
638 s->prob->last = vp8_rac_get_uint(c, 8);
639 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
645 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
647 VP56RangeCoder *c = &s->c;
648 int header_size, hscale, vscale, ret;
649 int width = s->avctx->width;
650 int height = s->avctx->height;
653 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
654 return AVERROR_INVALIDDATA;
657 s->keyframe = !(buf[0] & 1);
658 s->profile = (buf[0]>>1) & 7;
659 s->invisible = !(buf[0] & 0x10);
660 header_size = AV_RL24(buf) >> 5;
665 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
668 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
669 sizeof(s->put_pixels_tab));
670 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
671 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
672 sizeof(s->put_pixels_tab));
674 if (header_size > buf_size - 7 * s->keyframe) {
675 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
676 return AVERROR_INVALIDDATA;
680 if (AV_RL24(buf) != 0x2a019d) {
681 av_log(s->avctx, AV_LOG_ERROR,
682 "Invalid start code 0x%x\n", AV_RL24(buf));
683 return AVERROR_INVALIDDATA;
685 width = AV_RL16(buf + 3) & 0x3fff;
686 height = AV_RL16(buf + 5) & 0x3fff;
687 hscale = buf[4] >> 6;
688 vscale = buf[6] >> 6;
692 if (hscale || vscale)
693 avpriv_request_sample(s->avctx, "Upscaling");
695 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
696 vp78_reset_probability_tables(s);
697 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
698 sizeof(s->prob->pred16x16));
699 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
700 sizeof(s->prob->pred8x8c));
701 memcpy(s->prob->mvc, vp8_mv_default_prob,
702 sizeof(s->prob->mvc));
703 memset(&s->segmentation, 0, sizeof(s->segmentation));
704 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
707 ret = ff_vp56_init_range_decoder(c, buf, header_size);
711 buf_size -= header_size;
714 s->colorspace = vp8_rac_get(c);
716 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
717 s->fullrange = vp8_rac_get(c);
720 if ((s->segmentation.enabled = vp8_rac_get(c)))
721 parse_segment_info(s);
723 s->segmentation.update_map = 0; // FIXME: move this to some init function?
725 s->filter.simple = vp8_rac_get(c);
726 s->filter.level = vp8_rac_get_uint(c, 6);
727 s->filter.sharpness = vp8_rac_get_uint(c, 3);
729 if ((s->lf_delta.enabled = vp8_rac_get(c)))
733 if (setup_partitions(s, buf, buf_size)) {
734 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
735 return AVERROR_INVALIDDATA;
738 if (!s->macroblocks_base || /* first frame */
739 width != s->avctx->width || height != s->avctx->height ||
740 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
741 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
748 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
749 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
752 // if we aren't saving this frame's probabilities for future frames,
753 // make a copy of the current probabilities
754 if (!(s->update_probabilities = vp8_rac_get(c)))
755 s->prob[1] = s->prob[0];
757 s->update_last = s->keyframe || vp8_rac_get(c);
759 vp78_update_probability_tables(s);
761 if ((s->mbskip_enabled = vp8_rac_get(c)))
762 s->prob->mbskip = vp8_rac_get_uint(c, 8);
765 s->prob->intra = vp8_rac_get_uint(c, 8);
766 s->prob->last = vp8_rac_get_uint(c, 8);
767 s->prob->golden = vp8_rac_get_uint(c, 8);
768 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
774 static av_always_inline
775 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
777 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
778 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
779 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
780 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
784 * Motion vector coding, 17.1.
786 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
790 if (vp56_rac_get_prob_branchy(c, p[0])) {
793 for (i = 0; i < 3; i++)
794 x += vp56_rac_get_prob(c, p[9 + i]) << i;
795 for (i = (vp7 ? 7 : 9); i > 3; i--)
796 x += vp56_rac_get_prob(c, p[9 + i]) << i;
797 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
801 const uint8_t *ps = p + 2;
802 bit = vp56_rac_get_prob(c, *ps);
805 bit = vp56_rac_get_prob(c, *ps);
808 x += vp56_rac_get_prob(c, *ps);
811 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
814 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
816 return read_mv_component(c, p, 1);
819 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
821 return read_mv_component(c, p, 0);
824 static av_always_inline
825 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
828 return vp7_submv_prob;
831 return vp8_submv_prob[4 - !!left];
833 return vp8_submv_prob[2];
834 return vp8_submv_prob[1 - !!left];
838 * Split motion vector prediction, 16.4.
839 * @returns the number of motion vectors parsed (2, 4 or 16)
841 static av_always_inline
842 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
843 int layout, int is_vp7)
847 VP8Macroblock *top_mb;
848 VP8Macroblock *left_mb = &mb[-1];
849 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
850 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
852 VP56mv *left_mv = left_mb->bmv;
853 VP56mv *cur_mv = mb->bmv;
855 if (!layout) // layout is inlined, s->mb_layout is not
858 top_mb = &mb[-s->mb_width - 1];
859 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
860 top_mv = top_mb->bmv;
862 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
863 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
864 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
866 part_idx = VP8_SPLITMVMODE_8x8;
868 part_idx = VP8_SPLITMVMODE_4x4;
871 num = vp8_mbsplit_count[part_idx];
872 mbsplits_cur = vp8_mbsplits[part_idx],
873 firstidx = vp8_mbfirstidx[part_idx];
874 mb->partitioning = part_idx;
876 for (n = 0; n < num; n++) {
878 uint32_t left, above;
879 const uint8_t *submv_prob;
882 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
884 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
886 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
888 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
890 submv_prob = get_submv_prob(left, above, is_vp7);
892 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
893 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
894 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
895 mb->bmv[n].y = mb->mv.y +
896 read_mv_component(c, s->prob->mvc[0], is_vp7);
897 mb->bmv[n].x = mb->mv.x +
898 read_mv_component(c, s->prob->mvc[1], is_vp7);
900 AV_ZERO32(&mb->bmv[n]);
903 AV_WN32A(&mb->bmv[n], above);
906 AV_WN32A(&mb->bmv[n], left);
914 * The vp7 reference decoder uses a padding macroblock column (added to right
915 * edge of the frame) to guard against illegal macroblock offsets. The
916 * algorithm has bugs that permit offsets to straddle the padding column.
917 * This function replicates those bugs.
919 * @param[out] edge_x macroblock x address
920 * @param[out] edge_y macroblock y address
922 * @return macroblock offset legal (boolean)
924 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
925 int xoffset, int yoffset, int boundary,
926 int *edge_x, int *edge_y)
928 int vwidth = mb_width + 1;
929 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
930 if (new < boundary || new % vwidth == vwidth - 1)
932 *edge_y = new / vwidth;
933 *edge_x = new % vwidth;
937 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
939 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
942 static av_always_inline
943 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
944 int mb_x, int mb_y, int layout)
946 VP8Macroblock *mb_edge[12];
947 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
948 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
951 uint8_t cnt[3] = { 0 };
952 VP56RangeCoder *c = &s->c;
955 AV_ZERO32(&near_mv[0]);
956 AV_ZERO32(&near_mv[1]);
957 AV_ZERO32(&near_mv[2]);
959 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
960 const VP7MVPred * pred = &vp7_mv_pred[i];
963 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
964 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
965 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
966 ? s->macroblocks_base + 1 + edge_x +
967 (s->mb_width + 1) * (edge_y + 1)
968 : s->macroblocks + edge_x +
969 (s->mb_height - edge_y - 1) * 2;
970 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
972 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
973 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
975 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
976 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
980 AV_WN32A(&near_mv[CNT_NEAR], mv);
984 AV_WN32A(&near_mv[CNT_NEAREST], mv);
993 cnt[idx] += vp7_mv_pred[i].score;
996 mb->partitioning = VP8_SPLITMVMODE_NONE;
998 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
999 mb->mode = VP8_MVMODE_MV;
1001 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1003 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1005 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1006 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1008 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1010 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1011 mb->mode = VP8_MVMODE_SPLIT;
1012 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1014 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1015 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1016 mb->bmv[0] = mb->mv;
1019 mb->mv = near_mv[CNT_NEAR];
1020 mb->bmv[0] = mb->mv;
1023 mb->mv = near_mv[CNT_NEAREST];
1024 mb->bmv[0] = mb->mv;
1027 mb->mode = VP8_MVMODE_ZERO;
1029 mb->bmv[0] = mb->mv;
1033 static av_always_inline
1034 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1035 int mb_x, int mb_y, int layout)
1037 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1040 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1041 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1043 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1044 int8_t *sign_bias = s->sign_bias;
1046 uint8_t cnt[4] = { 0 };
1047 VP56RangeCoder *c = &s->c;
1049 if (!layout) { // layout is inlined (s->mb_layout is not)
1050 mb_edge[0] = mb + 2;
1051 mb_edge[2] = mb + 1;
1053 mb_edge[0] = mb - s->mb_width - 1;
1054 mb_edge[2] = mb - s->mb_width - 2;
1057 AV_ZERO32(&near_mv[0]);
1058 AV_ZERO32(&near_mv[1]);
1059 AV_ZERO32(&near_mv[2]);
1061 /* Process MB on top, left and top-left */
1062 #define MV_EDGE_CHECK(n) \
1064 VP8Macroblock *edge = mb_edge[n]; \
1065 int edge_ref = edge->ref_frame; \
1066 if (edge_ref != VP56_FRAME_CURRENT) { \
1067 uint32_t mv = AV_RN32A(&edge->mv); \
1069 if (cur_sign_bias != sign_bias[edge_ref]) { \
1070 /* SWAR negate of the values in mv. */ \
1072 mv = ((mv & 0x7fff7fff) + \
1073 0x00010001) ^ (mv & 0x80008000); \
1075 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1076 AV_WN32A(&near_mv[++idx], mv); \
1077 cnt[idx] += 1 + (n != 2); \
1079 cnt[CNT_ZERO] += 1 + (n != 2); \
1087 mb->partitioning = VP8_SPLITMVMODE_NONE;
1088 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1089 mb->mode = VP8_MVMODE_MV;
1091 /* If we have three distinct MVs, merge first and last if they're the same */
1092 if (cnt[CNT_SPLITMV] &&
1093 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1094 cnt[CNT_NEAREST] += 1;
1096 /* Swap near and nearest if necessary */
1097 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1098 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1099 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1102 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1103 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1104 /* Choose the best mv out of 0,0 and the nearest mv */
1105 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1106 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1107 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1108 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1110 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1111 mb->mode = VP8_MVMODE_SPLIT;
1112 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1114 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1115 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1116 mb->bmv[0] = mb->mv;
1119 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1120 mb->bmv[0] = mb->mv;
1123 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1124 mb->bmv[0] = mb->mv;
1127 mb->mode = VP8_MVMODE_ZERO;
1129 mb->bmv[0] = mb->mv;
1133 static av_always_inline
1134 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1135 int mb_x, int keyframe, int layout)
1137 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1140 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1141 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1146 uint8_t *const left = s->intra4x4_pred_mode_left;
1148 top = mb->intra4x4_pred_mode_top;
1150 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1151 for (y = 0; y < 4; y++) {
1152 for (x = 0; x < 4; x++) {
1154 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1155 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1156 left[y] = top[x] = *intra4x4;
1162 for (i = 0; i < 16; i++)
1163 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1164 vp8_pred4x4_prob_inter);
1168 static av_always_inline
1169 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1170 VP8Macroblock *mb, int mb_x, int mb_y,
1171 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1173 VP56RangeCoder *c = &s->c;
1174 static const char * const vp7_feature_name[] = { "q-index",
1176 "partial-golden-update",
1181 for (i = 0; i < 4; i++) {
1182 if (s->feature_enabled[i]) {
1183 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1184 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1185 s->feature_index_prob[i]);
1186 av_log(s->avctx, AV_LOG_WARNING,
1187 "Feature %s present in macroblock (value 0x%x)\n",
1188 vp7_feature_name[i], s->feature_value[i][index]);
1192 } else if (s->segmentation.update_map) {
1193 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1194 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1195 } else if (s->segmentation.enabled)
1196 *segment = ref ? *ref : *segment;
1197 mb->segment = *segment;
1199 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1202 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1203 vp8_pred16x16_prob_intra);
1205 if (mb->mode == MODE_I4x4) {
1206 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1208 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1209 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1211 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1213 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1214 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1217 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1218 vp8_pred8x8c_prob_intra);
1219 mb->ref_frame = VP56_FRAME_CURRENT;
1220 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1222 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1224 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1225 : VP56_FRAME_GOLDEN;
1227 mb->ref_frame = VP56_FRAME_PREVIOUS;
1228 s->ref_count[mb->ref_frame - 1]++;
1230 // motion vectors, 16.3
1232 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1234 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1237 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1239 if (mb->mode == MODE_I4x4)
1240 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1242 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1244 mb->ref_frame = VP56_FRAME_CURRENT;
1245 mb->partitioning = VP8_SPLITMVMODE_NONE;
1246 AV_ZERO32(&mb->bmv[0]);
1251 * @param r arithmetic bitstream reader context
1252 * @param block destination for block coefficients
1253 * @param probs probabilities to use when reading trees from the bitstream
1254 * @param i initial coeff index, 0 unless a separate DC block is coded
1255 * @param qmul array holding the dc/ac dequant factor at position 0/1
1257 * @return 0 if no coeffs were decoded
1258 * otherwise, the index of the last coeff decoded plus one
1260 static av_always_inline
1261 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1262 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1263 int i, uint8_t *token_prob, int16_t qmul[2],
1264 const uint8_t scan[16], int vp7)
1266 VP56RangeCoder c = *r;
1271 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1275 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1277 break; // invalid input; blocks should end with EOB
1278 token_prob = probs[i][0];
1284 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1286 token_prob = probs[i + 1][1];
1288 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1289 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1291 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1295 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1296 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1297 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1298 } else { // DCT_CAT2
1300 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1301 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1303 } else { // DCT_CAT3 and up
1304 int a = vp56_rac_get_prob(&c, token_prob[8]);
1305 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1306 int cat = (a << 1) + b;
1307 coeff = 3 + (8 << cat);
1308 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1311 token_prob = probs[i + 1][2];
1313 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1320 static av_always_inline
1321 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1323 int16_t dc = block[0];
1331 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1332 block[0] = pred[0] = dc;
1337 block[0] = pred[0] = dc;
1343 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1345 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1346 int i, uint8_t *token_prob,
1348 const uint8_t scan[16])
1350 return decode_block_coeffs_internal(r, block, probs, i,
1351 token_prob, qmul, scan, IS_VP7);
1354 #ifndef vp8_decode_block_coeffs_internal
1355 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1357 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1358 int i, uint8_t *token_prob,
1361 return decode_block_coeffs_internal(r, block, probs, i,
1362 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1367 * @param c arithmetic bitstream reader context
1368 * @param block destination for block coefficients
1369 * @param probs probabilities to use when reading trees from the bitstream
1370 * @param i initial coeff index, 0 unless a separate DC block is coded
1371 * @param zero_nhood the initial prediction context for number of surrounding
1372 * all-zero blocks (only left/top, so 0-2)
1373 * @param qmul array holding the dc/ac dequant factor at position 0/1
1374 * @param scan scan pattern (VP7 only)
1376 * @return 0 if no coeffs were decoded
1377 * otherwise, the index of the last coeff decoded plus one
1379 static av_always_inline
1380 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1381 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1382 int i, int zero_nhood, int16_t qmul[2],
1383 const uint8_t scan[16], int vp7)
1385 uint8_t *token_prob = probs[i][zero_nhood];
1386 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1388 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1389 token_prob, qmul, scan)
1390 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1394 static av_always_inline
1395 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1396 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1399 int i, x, y, luma_start = 0, luma_ctx = 3;
1400 int nnz_pred, nnz, nnz_total = 0;
1401 int segment = mb->segment;
1404 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1405 nnz_pred = t_nnz[8] + l_nnz[8];
1407 // decode DC values and do hadamard
1408 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1409 nnz_pred, s->qmat[segment].luma_dc_qmul,
1410 ff_zigzag_scan, is_vp7);
1411 l_nnz[8] = t_nnz[8] = !!nnz;
1413 if (is_vp7 && mb->mode > MODE_I4x4) {
1414 nnz |= inter_predict_dc(td->block_dc,
1415 s->inter_dc_pred[mb->ref_frame - 1]);
1422 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1424 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1431 for (y = 0; y < 4; y++)
1432 for (x = 0; x < 4; x++) {
1433 nnz_pred = l_nnz[y] + t_nnz[x];
1434 nnz = decode_block_coeffs(c, td->block[y][x],
1435 s->prob->token[luma_ctx],
1436 luma_start, nnz_pred,
1437 s->qmat[segment].luma_qmul,
1438 s->prob[0].scan, is_vp7);
1439 /* nnz+block_dc may be one more than the actual last index,
1440 * but we don't care */
1441 td->non_zero_count_cache[y][x] = nnz + block_dc;
1442 t_nnz[x] = l_nnz[y] = !!nnz;
1447 // TODO: what to do about dimensions? 2nd dim for luma is x,
1448 // but for chroma it's (y<<1)|x
1449 for (i = 4; i < 6; i++)
1450 for (y = 0; y < 2; y++)
1451 for (x = 0; x < 2; x++) {
1452 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1453 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1454 s->prob->token[2], 0, nnz_pred,
1455 s->qmat[segment].chroma_qmul,
1456 s->prob[0].scan, is_vp7);
1457 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1458 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1462 // if there were no coded coeffs despite the macroblock not being marked skip,
1463 // we MUST not do the inner loop filter and should not do IDCT
1464 // Since skip isn't used for bitstream prediction, just manually set it.
1469 static av_always_inline
1470 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1471 uint8_t *src_cb, uint8_t *src_cr,
1472 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1474 AV_COPY128(top_border, src_y + 15 * linesize);
1476 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1477 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1481 static av_always_inline
1482 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1483 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1484 int mb_y, int mb_width, int simple, int xchg)
1486 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1488 src_cb -= uvlinesize;
1489 src_cr -= uvlinesize;
1491 #define XCHG(a, b, xchg) \
1499 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1500 XCHG(top_border, src_y, xchg);
1501 XCHG(top_border + 8, src_y + 8, 1);
1502 if (mb_x < mb_width - 1)
1503 XCHG(top_border + 32, src_y + 16, 1);
1505 // only copy chroma for normal loop filter
1506 // or to initialize the top row to 127
1507 if (!simple || !mb_y) {
1508 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1509 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1510 XCHG(top_border + 16, src_cb, 1);
1511 XCHG(top_border + 24, src_cr, 1);
1515 static av_always_inline
1516 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1519 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1521 return mb_y ? mode : LEFT_DC_PRED8x8;
1524 static av_always_inline
1525 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1528 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1530 return mb_y ? mode : HOR_PRED8x8;
1533 static av_always_inline
1534 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1538 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1540 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1542 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1543 case PLANE_PRED8x8: /* TM */
1544 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1549 static av_always_inline
1550 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1553 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1555 return mb_y ? mode : HOR_VP8_PRED;
1559 static av_always_inline
1560 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1561 int *copy_buf, int vp7)
1565 if (!mb_x && mb_y) {
1570 case DIAG_DOWN_LEFT_PRED:
1571 case VERT_LEFT_PRED:
1572 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1580 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1582 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1583 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1584 * as 16x16/8x8 DC */
1585 case DIAG_DOWN_RIGHT_PRED:
1586 case VERT_RIGHT_PRED:
1595 static av_always_inline
1596 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1597 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1599 int x, y, mode, nnz;
1602 /* for the first row, we need to run xchg_mb_border to init the top edge
1603 * to 127 otherwise, skip it if we aren't going to deblock */
1604 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1605 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1606 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1607 s->filter.simple, 1);
1609 if (mb->mode < MODE_I4x4) {
1610 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1611 s->hpc.pred16x16[mode](dst[0], s->linesize);
1613 uint8_t *ptr = dst[0];
1614 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1615 const uint8_t lo = is_vp7 ? 128 : 127;
1616 const uint8_t hi = is_vp7 ? 128 : 129;
1617 uint8_t tr_top[4] = { lo, lo, lo, lo };
1619 // all blocks on the right edge of the macroblock use bottom edge
1620 // the top macroblock for their topright edge
1621 uint8_t *tr_right = ptr - s->linesize + 16;
1623 // if we're on the right edge of the frame, said edge is extended
1624 // from the top macroblock
1625 if (mb_y && mb_x == s->mb_width - 1) {
1626 tr = tr_right[-1] * 0x01010101u;
1627 tr_right = (uint8_t *) &tr;
1631 AV_ZERO128(td->non_zero_count_cache);
1633 for (y = 0; y < 4; y++) {
1634 uint8_t *topright = ptr + 4 - s->linesize;
1635 for (x = 0; x < 4; x++) {
1637 ptrdiff_t linesize = s->linesize;
1638 uint8_t *dst = ptr + 4 * x;
1639 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1641 if ((y == 0 || x == 3) && mb_y == 0) {
1644 topright = tr_right;
1646 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1647 mb_y + y, ©, is_vp7);
1649 dst = copy_dst + 12;
1653 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1655 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1659 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1668 copy_dst[11] = ptr[4 * x - 1];
1669 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1670 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1671 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1674 s->hpc.pred4x4[mode](dst, topright, linesize);
1676 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1677 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1678 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1679 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1682 nnz = td->non_zero_count_cache[y][x];
1685 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1686 td->block[y][x], s->linesize);
1688 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1689 td->block[y][x], s->linesize);
1694 ptr += 4 * s->linesize;
1699 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1700 mb_x, mb_y, is_vp7);
1701 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1702 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1704 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1705 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1706 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1707 s->filter.simple, 0);
1710 static const uint8_t subpel_idx[3][8] = {
1711 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1712 // also function pointer index
1713 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1714 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1720 * @param s VP8 decoding context
1721 * @param dst target buffer for block data at block position
1722 * @param ref reference picture buffer at origin (0, 0)
1723 * @param mv motion vector (relative to block position) to get pixel data from
1724 * @param x_off horizontal position of block from origin (0, 0)
1725 * @param y_off vertical position of block from origin (0, 0)
1726 * @param block_w width of block (16, 8 or 4)
1727 * @param block_h height of block (always same as block_w)
1728 * @param width width of src/dst plane data
1729 * @param height height of src/dst plane data
1730 * @param linesize size of a single line of plane data, including padding
1731 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1733 static av_always_inline
1734 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1735 ThreadFrame *ref, const VP56mv *mv,
1736 int x_off, int y_off, int block_w, int block_h,
1737 int width, int height, ptrdiff_t linesize,
1738 vp8_mc_func mc_func[3][3])
1740 uint8_t *src = ref->f->data[0];
1743 ptrdiff_t src_linesize = linesize;
1745 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1746 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1748 x_off += mv->x >> 2;
1749 y_off += mv->y >> 2;
1752 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1753 src += y_off * linesize + x_off;
1754 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1755 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1756 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1757 src - my_idx * linesize - mx_idx,
1758 EDGE_EMU_LINESIZE, linesize,
1759 block_w + subpel_idx[1][mx],
1760 block_h + subpel_idx[1][my],
1761 x_off - mx_idx, y_off - my_idx,
1763 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1764 src_linesize = EDGE_EMU_LINESIZE;
1766 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1768 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1769 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1770 linesize, block_h, 0, 0);
1775 * chroma MC function
1777 * @param s VP8 decoding context
1778 * @param dst1 target buffer for block data at block position (U plane)
1779 * @param dst2 target buffer for block data at block position (V plane)
1780 * @param ref reference picture buffer at origin (0, 0)
1781 * @param mv motion vector (relative to block position) to get pixel data from
1782 * @param x_off horizontal position of block from origin (0, 0)
1783 * @param y_off vertical position of block from origin (0, 0)
1784 * @param block_w width of block (16, 8 or 4)
1785 * @param block_h height of block (always same as block_w)
1786 * @param width width of src/dst plane data
1787 * @param height height of src/dst plane data
1788 * @param linesize size of a single line of plane data, including padding
1789 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1791 static av_always_inline
1792 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1793 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1794 int x_off, int y_off, int block_w, int block_h,
1795 int width, int height, ptrdiff_t linesize,
1796 vp8_mc_func mc_func[3][3])
1798 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1801 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1802 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1804 x_off += mv->x >> 3;
1805 y_off += mv->y >> 3;
1808 src1 += y_off * linesize + x_off;
1809 src2 += y_off * linesize + x_off;
1810 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1811 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1812 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1813 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1814 src1 - my_idx * linesize - mx_idx,
1815 EDGE_EMU_LINESIZE, linesize,
1816 block_w + subpel_idx[1][mx],
1817 block_h + subpel_idx[1][my],
1818 x_off - mx_idx, y_off - my_idx, width, height);
1819 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1820 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1822 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1823 src2 - my_idx * linesize - mx_idx,
1824 EDGE_EMU_LINESIZE, linesize,
1825 block_w + subpel_idx[1][mx],
1826 block_h + subpel_idx[1][my],
1827 x_off - mx_idx, y_off - my_idx, width, height);
1828 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1829 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1831 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1832 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1835 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1836 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1837 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1841 static av_always_inline
1842 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1843 ThreadFrame *ref_frame, int x_off, int y_off,
1844 int bx_off, int by_off, int block_w, int block_h,
1845 int width, int height, VP56mv *mv)
1850 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1851 ref_frame, mv, x_off + bx_off, y_off + by_off,
1852 block_w, block_h, width, height, s->linesize,
1853 s->put_pixels_tab[block_w == 8]);
1856 if (s->profile == 3) {
1857 /* this block only applies VP8; it is safe to check
1858 * only the profile, as VP7 profile <= 1 */
1870 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1871 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1872 &uvmv, x_off + bx_off, y_off + by_off,
1873 block_w, block_h, width, height, s->uvlinesize,
1874 s->put_pixels_tab[1 + (block_w == 4)]);
1877 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1878 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1879 static av_always_inline
1880 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1883 /* Don't prefetch refs that haven't been used very often this frame. */
1884 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1885 int x_off = mb_x << 4, y_off = mb_y << 4;
1886 int mx = (mb->mv.x >> 2) + x_off + 8;
1887 int my = (mb->mv.y >> 2) + y_off;
1888 uint8_t **src = s->framep[ref]->tf.f->data;
1889 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1890 /* For threading, a ff_thread_await_progress here might be useful, but
1891 * it actually slows down the decoder. Since a bad prefetch doesn't
1892 * generate bad decoder output, we don't run it here. */
1893 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1894 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1895 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1900 * Apply motion vectors to prediction buffer, chapter 18.
1902 static av_always_inline
1903 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1904 VP8Macroblock *mb, int mb_x, int mb_y)
1906 int x_off = mb_x << 4, y_off = mb_y << 4;
1907 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1908 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1909 VP56mv *bmv = mb->bmv;
1911 switch (mb->partitioning) {
1912 case VP8_SPLITMVMODE_NONE:
1913 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1914 0, 0, 16, 16, width, height, &mb->mv);
1916 case VP8_SPLITMVMODE_4x4: {
1921 for (y = 0; y < 4; y++) {
1922 for (x = 0; x < 4; x++) {
1923 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1924 ref, &bmv[4 * y + x],
1925 4 * x + x_off, 4 * y + y_off, 4, 4,
1926 width, height, s->linesize,
1927 s->put_pixels_tab[2]);
1936 for (y = 0; y < 2; y++) {
1937 for (x = 0; x < 2; x++) {
1938 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1939 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1940 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1941 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1942 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1943 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1944 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1945 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1946 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1947 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1948 if (s->profile == 3) {
1952 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1953 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1954 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1955 width, height, s->uvlinesize,
1956 s->put_pixels_tab[2]);
1961 case VP8_SPLITMVMODE_16x8:
1962 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1963 0, 0, 16, 8, width, height, &bmv[0]);
1964 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1965 0, 8, 16, 8, width, height, &bmv[1]);
1967 case VP8_SPLITMVMODE_8x16:
1968 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1969 0, 0, 8, 16, width, height, &bmv[0]);
1970 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1971 8, 0, 8, 16, width, height, &bmv[1]);
1973 case VP8_SPLITMVMODE_8x8:
1974 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1975 0, 0, 8, 8, width, height, &bmv[0]);
1976 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1977 8, 0, 8, 8, width, height, &bmv[1]);
1978 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1979 0, 8, 8, 8, width, height, &bmv[2]);
1980 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1981 8, 8, 8, 8, width, height, &bmv[3]);
1986 static av_always_inline
1987 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1991 if (mb->mode != MODE_I4x4) {
1992 uint8_t *y_dst = dst[0];
1993 for (y = 0; y < 4; y++) {
1994 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1996 if (nnz4 & ~0x01010101) {
1997 for (x = 0; x < 4; x++) {
1998 if ((uint8_t) nnz4 == 1)
1999 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2002 else if ((uint8_t) nnz4 > 1)
2003 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2011 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2014 y_dst += 4 * s->linesize;
2018 for (ch = 0; ch < 2; ch++) {
2019 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2021 uint8_t *ch_dst = dst[1 + ch];
2022 if (nnz4 & ~0x01010101) {
2023 for (y = 0; y < 2; y++) {
2024 for (x = 0; x < 2; x++) {
2025 if ((uint8_t) nnz4 == 1)
2026 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2027 td->block[4 + ch][(y << 1) + x],
2029 else if ((uint8_t) nnz4 > 1)
2030 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2031 td->block[4 + ch][(y << 1) + x],
2035 goto chroma_idct_end;
2037 ch_dst += 4 * s->uvlinesize;
2040 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2048 static av_always_inline
2049 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2050 VP8FilterStrength *f, int is_vp7)
2052 int interior_limit, filter_level;
2054 if (s->segmentation.enabled) {
2055 filter_level = s->segmentation.filter_level[mb->segment];
2056 if (!s->segmentation.absolute_vals)
2057 filter_level += s->filter.level;
2059 filter_level = s->filter.level;
2061 if (s->lf_delta.enabled) {
2062 filter_level += s->lf_delta.ref[mb->ref_frame];
2063 filter_level += s->lf_delta.mode[mb->mode];
2066 filter_level = av_clip_uintp2(filter_level, 6);
2068 interior_limit = filter_level;
2069 if (s->filter.sharpness) {
2070 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2071 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2073 interior_limit = FFMAX(interior_limit, 1);
2075 f->filter_level = filter_level;
2076 f->inner_limit = interior_limit;
2077 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2078 mb->mode == VP8_MVMODE_SPLIT;
2081 static av_always_inline
2082 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2083 int mb_x, int mb_y, int is_vp7)
2085 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2086 int filter_level = f->filter_level;
2087 int inner_limit = f->inner_limit;
2088 int inner_filter = f->inner_filter;
2089 ptrdiff_t linesize = s->linesize;
2090 ptrdiff_t uvlinesize = s->uvlinesize;
2091 static const uint8_t hev_thresh_lut[2][64] = {
2092 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2093 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2094 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2096 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2097 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2098 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2106 bedge_lim_y = filter_level;
2107 bedge_lim_uv = filter_level * 2;
2108 mbedge_lim = filter_level + 2;
2111 bedge_lim_uv = filter_level * 2 + inner_limit;
2112 mbedge_lim = bedge_lim_y + 4;
2115 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2118 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2119 mbedge_lim, inner_limit, hev_thresh);
2120 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2121 mbedge_lim, inner_limit, hev_thresh);
2124 #define H_LOOP_FILTER_16Y_INNER(cond) \
2125 if (cond && inner_filter) { \
2126 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2127 bedge_lim_y, inner_limit, \
2129 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2130 bedge_lim_y, inner_limit, \
2132 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2133 bedge_lim_y, inner_limit, \
2135 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2136 uvlinesize, bedge_lim_uv, \
2137 inner_limit, hev_thresh); \
2140 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2143 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2144 mbedge_lim, inner_limit, hev_thresh);
2145 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2146 mbedge_lim, inner_limit, hev_thresh);
2150 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2151 linesize, bedge_lim_y,
2152 inner_limit, hev_thresh);
2153 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2154 linesize, bedge_lim_y,
2155 inner_limit, hev_thresh);
2156 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2157 linesize, bedge_lim_y,
2158 inner_limit, hev_thresh);
2159 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2160 dst[2] + 4 * uvlinesize,
2161 uvlinesize, bedge_lim_uv,
2162 inner_limit, hev_thresh);
2165 H_LOOP_FILTER_16Y_INNER(is_vp7)
2168 static av_always_inline
2169 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2172 int mbedge_lim, bedge_lim;
2173 int filter_level = f->filter_level;
2174 int inner_limit = f->inner_limit;
2175 int inner_filter = f->inner_filter;
2176 ptrdiff_t linesize = s->linesize;
2181 bedge_lim = 2 * filter_level + inner_limit;
2182 mbedge_lim = bedge_lim + 4;
2185 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2187 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2188 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2189 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2193 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2195 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2196 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2197 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2201 #define MARGIN (16 << 2)
2202 static av_always_inline
2203 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2204 VP8Frame *prev_frame, int is_vp7)
2206 VP8Context *s = avctx->priv_data;
2209 s->mv_bounds.mv_min.y = -MARGIN;
2210 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2211 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2212 VP8Macroblock *mb = s->macroblocks_base +
2213 ((s->mb_width + 1) * (mb_y + 1) + 1);
2214 int mb_xy = mb_y * s->mb_width;
2216 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2218 s->mv_bounds.mv_min.x = -MARGIN;
2219 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2220 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2222 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2223 DC_PRED * 0x01010101);
2224 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2225 prev_frame && prev_frame->seg_map ?
2226 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2227 s->mv_bounds.mv_min.x -= 64;
2228 s->mv_bounds.mv_max.x -= 64;
2230 s->mv_bounds.mv_min.y -= 64;
2231 s->mv_bounds.mv_max.y -= 64;
2235 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2236 VP8Frame *prev_frame)
2238 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2241 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2242 VP8Frame *prev_frame)
2244 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2248 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2250 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2251 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2252 pthread_mutex_lock(&otd->lock); \
2253 atomic_store(&td->wait_mb_pos, tmp); \
2255 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2257 pthread_cond_wait(&otd->cond, &otd->lock); \
2259 atomic_store(&td->wait_mb_pos, INT_MAX); \
2260 pthread_mutex_unlock(&otd->lock); \
2264 #define update_pos(td, mb_y, mb_x) \
2266 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2267 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2269 int is_null = !next_td || !prev_td; \
2270 int pos_check = (is_null) ? 1 : \
2271 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2272 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2273 atomic_store(&td->thread_mb_pos, pos); \
2274 if (sliced_threading && pos_check) { \
2275 pthread_mutex_lock(&td->lock); \
2276 pthread_cond_broadcast(&td->cond); \
2277 pthread_mutex_unlock(&td->lock); \
2281 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2282 #define update_pos(td, mb_y, mb_x) while(0)
2285 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2286 int jobnr, int threadnr, int is_vp7)
2288 VP8Context *s = avctx->priv_data;
2289 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2290 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2291 int mb_x, mb_xy = mb_y * s->mb_width;
2292 int num_jobs = s->num_jobs;
2293 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2294 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2297 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2298 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2299 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2302 if (c->end <= c->buffer && c->bits >= 0)
2303 return AVERROR_INVALIDDATA;
2308 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2309 if (mb_y == s->mb_height - 1)
2312 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2313 if (s->mb_layout == 1)
2314 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2316 // Make sure the previous frame has read its segmentation map,
2317 // if we re-use the same map.
2318 if (prev_frame && s->segmentation.enabled &&
2319 !s->segmentation.update_map)
2320 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2321 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2322 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2323 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2326 if (!is_vp7 || mb_y == 0)
2327 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2329 td->mv_bounds.mv_min.x = -MARGIN;
2330 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2332 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2333 if (c->end <= c->buffer && c->bits >= 0)
2334 return AVERROR_INVALIDDATA;
2335 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2336 if (prev_td != td) {
2337 if (threadnr != 0) {
2338 check_thread_pos(td, prev_td,
2339 mb_x + (is_vp7 ? 2 : 1),
2340 mb_y - (is_vp7 ? 2 : 1));
2342 check_thread_pos(td, prev_td,
2343 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2344 mb_y - (is_vp7 ? 2 : 1));
2348 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2350 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2351 dst[2] - dst[1], 2);
2354 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2355 prev_frame && prev_frame->seg_map ?
2356 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2358 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2361 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2363 if (mb->mode <= MODE_I4x4)
2364 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2366 inter_predict(s, td, dst, mb, mb_x, mb_y);
2368 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2371 idct_mb(s, td, dst, mb);
2373 AV_ZERO64(td->left_nnz);
2374 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2376 /* Reset DC block predictors if they would exist
2377 * if the mb had coefficients */
2378 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2379 td->left_nnz[8] = 0;
2380 s->top_nnz[mb_x][8] = 0;
2384 if (s->deblock_filter)
2385 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2387 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2388 if (s->filter.simple)
2389 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2390 NULL, NULL, s->linesize, 0, 1);
2392 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2393 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2396 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2401 td->mv_bounds.mv_min.x -= 64;
2402 td->mv_bounds.mv_max.x -= 64;
2404 if (mb_x == s->mb_width + 1) {
2405 update_pos(td, mb_y, s->mb_width + 3);
2407 update_pos(td, mb_y, mb_x);
2413 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2414 int jobnr, int threadnr)
2416 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2419 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2420 int jobnr, int threadnr)
2422 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2425 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2426 int jobnr, int threadnr, int is_vp7)
2428 VP8Context *s = avctx->priv_data;
2429 VP8ThreadData *td = &s->thread_data[threadnr];
2430 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2431 AVFrame *curframe = s->curframe->tf.f;
2433 VP8ThreadData *prev_td, *next_td;
2435 curframe->data[0] + 16 * mb_y * s->linesize,
2436 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2437 curframe->data[2] + 8 * mb_y * s->uvlinesize
2440 if (s->mb_layout == 1)
2441 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2443 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2448 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2449 if (mb_y == s->mb_height - 1)
2452 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2454 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2455 VP8FilterStrength *f = &td->filter_strength[mb_x];
2457 check_thread_pos(td, prev_td,
2458 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2460 if (next_td != &s->thread_data[0])
2461 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2463 if (num_jobs == 1) {
2464 if (s->filter.simple)
2465 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2466 NULL, NULL, s->linesize, 0, 1);
2468 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2472 if (s->filter.simple)
2473 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2475 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2480 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2484 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2485 int jobnr, int threadnr)
2487 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2490 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2491 int jobnr, int threadnr)
2493 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2496 static av_always_inline
2497 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2498 int threadnr, int is_vp7)
2500 VP8Context *s = avctx->priv_data;
2501 VP8ThreadData *td = &s->thread_data[jobnr];
2502 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2503 VP8Frame *curframe = s->curframe;
2504 int mb_y, num_jobs = s->num_jobs;
2507 td->thread_nr = threadnr;
2508 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2509 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2510 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2511 atomic_store(&td->thread_mb_pos, mb_y << 16);
2512 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2514 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2517 if (s->deblock_filter)
2518 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2519 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2521 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2522 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2524 if (avctx->active_thread_type == FF_THREAD_FRAME)
2525 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2531 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2532 int jobnr, int threadnr)
2534 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2537 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2538 int jobnr, int threadnr)
2540 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2544 static av_always_inline
2545 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2546 AVPacket *avpkt, int is_vp7)
2548 VP8Context *s = avctx->priv_data;
2549 int ret, i, referenced, num_jobs;
2550 enum AVDiscard skip_thresh;
2551 VP8Frame *av_uninit(curframe), *prev_frame;
2553 av_assert0(avctx->pix_fmt == AV_PIX_FMT_YUVA420P || avctx->pix_fmt == AV_PIX_FMT_YUV420P);
2556 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2558 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2563 prev_frame = s->framep[VP56_FRAME_CURRENT];
2565 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2566 s->update_altref == VP56_FRAME_CURRENT;
2568 skip_thresh = !referenced ? AVDISCARD_NONREF
2569 : !s->keyframe ? AVDISCARD_NONKEY
2572 if (avctx->skip_frame >= skip_thresh) {
2574 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2577 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2579 // release no longer referenced frames
2580 for (i = 0; i < 5; i++)
2581 if (s->frames[i].tf.f->data[0] &&
2582 &s->frames[i] != prev_frame &&
2583 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2584 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2585 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2586 vp8_release_frame(s, &s->frames[i]);
2588 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2591 avctx->colorspace = AVCOL_SPC_BT470BG;
2593 avctx->color_range = AVCOL_RANGE_JPEG;
2595 avctx->color_range = AVCOL_RANGE_MPEG;
2597 /* Given that arithmetic probabilities are updated every frame, it's quite
2598 * likely that the values we have on a random interframe are complete
2599 * junk if we didn't start decode on a keyframe. So just don't display
2600 * anything rather than junk. */
2601 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2602 !s->framep[VP56_FRAME_GOLDEN] ||
2603 !s->framep[VP56_FRAME_GOLDEN2])) {
2604 av_log(avctx, AV_LOG_WARNING,
2605 "Discarding interframe without a prior keyframe!\n");
2606 ret = AVERROR_INVALIDDATA;
2610 curframe->tf.f->key_frame = s->keyframe;
2611 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2612 : AV_PICTURE_TYPE_P;
2613 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2616 // check if golden and altref are swapped
2617 if (s->update_altref != VP56_FRAME_NONE)
2618 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2620 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2622 if (s->update_golden != VP56_FRAME_NONE)
2623 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2625 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2628 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2630 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2632 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2634 if (avctx->codec->update_thread_context)
2635 ff_thread_finish_setup(avctx);
2637 s->linesize = curframe->tf.f->linesize[0];
2638 s->uvlinesize = curframe->tf.f->linesize[1];
2640 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2641 /* Zero macroblock structures for top/top-left prediction
2642 * from outside the frame. */
2644 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2645 (s->mb_width + 1) * sizeof(*s->macroblocks));
2646 if (!s->mb_layout && s->keyframe)
2647 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2649 memset(s->ref_count, 0, sizeof(s->ref_count));
2651 if (s->mb_layout == 1) {
2652 // Make sure the previous frame has read its segmentation map,
2653 // if we re-use the same map.
2654 if (prev_frame && s->segmentation.enabled &&
2655 !s->segmentation.update_map)
2656 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2658 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2660 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2663 if (avctx->active_thread_type == FF_THREAD_FRAME)
2666 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2667 s->num_jobs = num_jobs;
2668 s->curframe = curframe;
2669 s->prev_frame = prev_frame;
2670 s->mv_bounds.mv_min.y = -MARGIN;
2671 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2672 for (i = 0; i < MAX_THREADS; i++) {
2673 VP8ThreadData *td = &s->thread_data[i];
2674 atomic_init(&td->thread_mb_pos, 0);
2675 atomic_init(&td->wait_mb_pos, INT_MAX);
2678 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2681 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2684 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2685 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2688 // if future frames don't use the updated probabilities,
2689 // reset them to the values we saved
2690 if (!s->update_probabilities)
2691 s->prob[0] = s->prob[1];
2693 if (!s->invisible) {
2694 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2701 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2705 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2708 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2711 #if CONFIG_VP7_DECODER
2712 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2715 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2717 #endif /* CONFIG_VP7_DECODER */
2719 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2721 VP8Context *s = avctx->priv_data;
2727 vp8_decode_flush_impl(avctx, 1);
2728 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2729 av_frame_free(&s->frames[i].tf.f);
2734 static av_cold int vp8_init_frames(VP8Context *s)
2737 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2738 s->frames[i].tf.f = av_frame_alloc();
2739 if (!s->frames[i].tf.f)
2740 return AVERROR(ENOMEM);
2745 static av_always_inline
2746 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2748 VP8Context *s = avctx->priv_data;
2752 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2753 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2754 avctx->internal->allocate_progress = 1;
2756 ff_videodsp_init(&s->vdsp, 8);
2758 ff_vp78dsp_init(&s->vp8dsp);
2759 if (CONFIG_VP7_DECODER && is_vp7) {
2760 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2761 ff_vp7dsp_init(&s->vp8dsp);
2762 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2763 s->filter_mb_row = vp7_filter_mb_row;
2764 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2765 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2766 ff_vp8dsp_init(&s->vp8dsp);
2767 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2768 s->filter_mb_row = vp8_filter_mb_row;
2771 /* does not change for VP8 */
2772 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2774 if ((ret = vp8_init_frames(s)) < 0) {
2775 ff_vp8_decode_free(avctx);
2782 #if CONFIG_VP7_DECODER
2783 static int vp7_decode_init(AVCodecContext *avctx)
2785 return vp78_decode_init(avctx, IS_VP7);
2787 #endif /* CONFIG_VP7_DECODER */
2789 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2791 return vp78_decode_init(avctx, IS_VP8);
2794 #if CONFIG_VP8_DECODER
2796 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2798 VP8Context *s = avctx->priv_data;
2803 if ((ret = vp8_init_frames(s)) < 0) {
2804 ff_vp8_decode_free(avctx);
2811 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2813 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2814 const AVCodecContext *src)
2816 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2819 if (s->macroblocks_base &&
2820 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2822 s->mb_width = s_src->mb_width;
2823 s->mb_height = s_src->mb_height;
2826 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2827 s->segmentation = s_src->segmentation;
2828 s->lf_delta = s_src->lf_delta;
2829 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2831 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2832 if (s_src->frames[i].tf.f->data[0]) {
2833 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2839 s->framep[0] = REBASE(s_src->next_framep[0]);
2840 s->framep[1] = REBASE(s_src->next_framep[1]);
2841 s->framep[2] = REBASE(s_src->next_framep[2]);
2842 s->framep[3] = REBASE(s_src->next_framep[3]);
2846 #endif /* HAVE_THREADS */
2847 #endif /* CONFIG_VP8_DECODER */
2849 #if CONFIG_VP7_DECODER
2850 AVCodec ff_vp7_decoder = {
2852 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2853 .type = AVMEDIA_TYPE_VIDEO,
2854 .id = AV_CODEC_ID_VP7,
2855 .priv_data_size = sizeof(VP8Context),
2856 .init = vp7_decode_init,
2857 .close = ff_vp8_decode_free,
2858 .decode = vp7_decode_frame,
2859 .capabilities = AV_CODEC_CAP_DR1,
2860 .flush = vp8_decode_flush,
2862 #endif /* CONFIG_VP7_DECODER */
2864 #if CONFIG_VP8_DECODER
2865 AVCodec ff_vp8_decoder = {
2867 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2868 .type = AVMEDIA_TYPE_VIDEO,
2869 .id = AV_CODEC_ID_VP8,
2870 .priv_data_size = sizeof(VP8Context),
2871 .init = ff_vp8_decode_init,
2872 .close = ff_vp8_decode_free,
2873 .decode = ff_vp8_decode_frame,
2874 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2875 AV_CODEC_CAP_SLICE_THREADS,
2876 .flush = vp8_decode_flush,
2877 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2878 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2880 #endif /* CONFIG_VP7_DECODER */