+static void release_queued_segmaps(VP8Context *s, int is_close)
+{
+ int leave_behind = is_close ? 0 : !s->maps_are_invalid;
+ while (s->num_maps_to_be_freed > leave_behind)
+ av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
+ s->maps_are_invalid = 0;
+}
+
+#define MARGIN (16 << 2)
+static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
+ AVFrame *prev_frame)
+{
+ VP8Context *s = avctx->priv_data;
+ int mb_x, mb_y;
+
+ s->mv_min.y = -MARGIN;
+ s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+ for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
+ int mb_xy = mb_y*s->mb_width;
+
+ AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
+
+ s->mv_min.x = -MARGIN;
+ s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+ for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+ if (mb_y == 0)
+ AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
+ decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
+ prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
+ s->mv_min.x -= 64;
+ s->mv_max.x -= 64;
+ }
+ s->mv_min.y -= 64;
+ s->mv_max.y -= 64;
+ }
+}
+
+#if HAVE_THREADS
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
+ do {\
+ int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
+ if (otd->thread_mb_pos < tmp) {\
+ pthread_mutex_lock(&otd->lock);\
+ td->wait_mb_pos = tmp;\
+ do {\
+ if (otd->thread_mb_pos >= tmp)\
+ break;\
+ pthread_cond_wait(&otd->cond, &otd->lock);\
+ } while (1);\
+ td->wait_mb_pos = INT_MAX;\
+ pthread_mutex_unlock(&otd->lock);\
+ }\
+ } while(0);
+
+#define update_pos(td, mb_y, mb_x)\
+ do {\
+ int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
+ int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
+ int is_null = (next_td == NULL) || (prev_td == NULL);\
+ int pos_check = (is_null) ? 1 :\
+ (next_td != td && pos >= next_td->wait_mb_pos) ||\
+ (prev_td != td && pos >= prev_td->wait_mb_pos);\
+ td->thread_mb_pos = pos;\
+ if (sliced_threading && pos_check) {\
+ pthread_mutex_lock(&td->lock);\
+ pthread_cond_broadcast(&td->cond);\
+ pthread_mutex_unlock(&td->lock);\
+ }\
+ } while(0);
+#else
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
+#define update_pos(td, mb_y, mb_x)
+#endif
+
+static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+ int jobnr, int threadnr)
+{
+ VP8Context *s = avctx->priv_data;
+ VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
+ int mb_y = td->thread_mb_pos>>16;
+ int i, y, mb_x, mb_xy = mb_y*s->mb_width;
+ int num_jobs = s->num_jobs;
+ AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
+ VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
+ VP8Macroblock *mb;
+ uint8_t *dst[3] = {
+ curframe->data[0] + 16*mb_y*s->linesize,
+ curframe->data[1] + 8*mb_y*s->uvlinesize,
+ curframe->data[2] + 8*mb_y*s->uvlinesize
+ };
+ if (mb_y == 0) prev_td = td;
+ else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
+ if (mb_y == s->mb_height-1) next_td = td;
+ else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
+ if (s->mb_layout == 1)
+ mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
+ else {
+ mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
+ memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
+ AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
+ }
+
+ memset(td->left_nnz, 0, sizeof(td->left_nnz));
+ // left edge of 129 for intra prediction
+ if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
+ for (i = 0; i < 3; i++)
+ for (y = 0; y < 16>>!!i; y++)
+ dst[i][y*curframe->linesize[i]-1] = 129;
+ if (mb_y == 1) {
+ s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
+ }
+ }
+
+ s->mv_min.x = -MARGIN;
+ s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+
+ for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
+ // Wait for previous thread to read mb_x+2, and reach mb_y-1.
+ if (prev_td != td) {
+ if (threadnr != 0) {
+ check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
+ } else {
+ check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
+ }
+ }
+
+ s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
+ s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
+
+ if (!s->mb_layout)
+ decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
+ prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
+
+ prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
+
+ if (!mb->skip)
+ decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
+
+ if (mb->mode <= MODE_I4x4)
+ intra_predict(s, td, dst, mb, mb_x, mb_y);
+ else
+ inter_predict(s, td, dst, mb, mb_x, mb_y);
+
+ prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
+
+ if (!mb->skip) {
+ idct_mb(s, td, dst, mb);
+ } else {
+ AV_ZERO64(td->left_nnz);
+ AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
+
+ // Reset DC block predictors if they would exist if the mb had coefficients
+ if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
+ td->left_nnz[8] = 0;
+ s->top_nnz[mb_x][8] = 0;
+ }
+ }
+
+ if (s->deblock_filter)
+ filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
+
+ if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
+ if (s->filter.simple)
+ backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
+ else
+ backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+ }
+
+ prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
+
+ dst[0] += 16;
+ dst[1] += 8;
+ dst[2] += 8;
+ s->mv_min.x -= 64;
+ s->mv_max.x -= 64;
+
+ if (mb_x == s->mb_width+1) {
+ update_pos(td, mb_y, s->mb_width+3);
+ } else {
+ update_pos(td, mb_y, mb_x);
+ }
+ }
+}
+
+static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
+ int jobnr, int threadnr)