+#if HAVE_THREADS
+static av_always_inline
+int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
+ int threadnr)
+{
+ VP9Context *s = avctx->priv_data;
+ VP9TileData *td = &s->td[jobnr];
+ ptrdiff_t uvoff, yoff, ls_y, ls_uv;
+ int bytesperpixel = s->bytesperpixel, row, col, tile_row;
+ unsigned tile_cols_len;
+ int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
+ VP9Filter *lflvl_ptr_base;
+ AVFrame *f;
+
+ f = s->s.frames[CUR_FRAME].tf.f;
+ ls_y = f->linesize[0];
+ ls_uv =f->linesize[1];
+
+ set_tile_offset(&tile_col_start, &tile_col_end,
+ jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
+ td->tile_col_start = tile_col_start;
+ uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
+ yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
+ lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
+
+ for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
+ set_tile_offset(&tile_row_start, &tile_row_end,
+ tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
+
+ td->c = &td->c_b[tile_row];
+ for (row = tile_row_start; row < tile_row_end;
+ row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
+ ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
+ VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
+
+ memset(td->left_partition_ctx, 0, 8);
+ memset(td->left_skip_ctx, 0, 8);
+ if (s->s.h.keyframe || s->s.h.intraonly) {
+ memset(td->left_mode_ctx, DC_PRED, 16);
+ } else {
+ memset(td->left_mode_ctx, NEARESTMV, 8);
+ }
+ memset(td->left_y_nnz_ctx, 0, 16);
+ memset(td->left_uv_nnz_ctx, 0, 32);
+ memset(td->left_segpred_ctx, 0, 8);
+
+ for (col = tile_col_start;
+ col < tile_col_end;
+ col += 8, yoff2 += 64 * bytesperpixel,
+ uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+ // FIXME integrate with lf code (i.e. zero after each
+ // use, similar to invtxfm coefficients, or similar)
+ memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
+ decode_sb(td, row, col, lflvl_ptr,
+ yoff2, uvoff2, BL_64X64);
+ }
+
+ // backup pre-loopfilter reconstruction data for intra
+ // prediction of next row of sb64s
+ tile_cols_len = tile_col_end - tile_col_start;
+ if (row + 8 < s->rows) {
+ memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
+ f->data[0] + yoff + 63 * ls_y,
+ 8 * tile_cols_len * bytesperpixel);
+ memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
+ f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+ 8 * tile_cols_len * bytesperpixel >> s->ss_h);
+ memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
+ f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+ 8 * tile_cols_len * bytesperpixel >> s->ss_h);
+ }
+
+ vp9_report_tile_progress(s, row >> 3, 1);
+ }
+ }
+ return 0;
+}
+
+static av_always_inline
+int loopfilter_proc(AVCodecContext *avctx)
+{
+ VP9Context *s = avctx->priv_data;
+ ptrdiff_t uvoff, yoff, ls_y, ls_uv;
+ VP9Filter *lflvl_ptr;
+ int bytesperpixel = s->bytesperpixel, col, i;
+ AVFrame *f;
+
+ f = s->s.frames[CUR_FRAME].tf.f;
+ ls_y = f->linesize[0];
+ ls_uv =f->linesize[1];
+
+ for (i = 0; i < s->sb_rows; i++) {
+ vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
+
+ if (s->s.h.filter.level) {
+ yoff = (ls_y * 64)*i;
+ uvoff = (ls_uv * 64 >> s->ss_v)*i;
+ lflvl_ptr = s->lflvl+s->sb_cols*i;
+ for (col = 0; col < s->cols;
+ col += 8, yoff += 64 * bytesperpixel,
+ uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
+ ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
+ yoff, uvoff);
+ }
+ }
+ }
+ return 0;
+}
+#endif