]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/vc1_block.c
avcodec/vc1: remove unused overlap smooting and loop filter
[ffmpeg] / libavcodec / vc1_block.c
index f9f26f7e4290d8c800a36d8fd8699c2ef1dea46e..e78e4806bd1f5d3e719f690f211b6981c6bf4741 100644 (file)
@@ -69,73 +69,74 @@ static inline void init_block_index(VC1Context *v)
 static void vc1_put_signed_blocks_clamped(VC1Context *v)
 {
     MpegEncContext *s = &v->s;
-    int topleft_mb_pos, top_mb_pos;
-    int stride_y, fieldtx = 0;
-    int v_dist;
-
-    /* The put pixels loop is always one MB row behind the decoding loop,
-     * because we can only put pixels when overlap filtering is done, and
-     * for filtering of the bottom edge of a MB, we need the next MB row
-     * present as well.
-     * Within the row, the put pixels loop is also one MB col behind the
-     * decoding loop. The reason for this is again, because for filtering
-     * of the right MB edge, we need the next MB present. */
-    if (!s->first_slice_line) {
+    uint8_t *dest;
+    int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
+    int fieldtx = 0;
+    int i;
+
+    /* The put pixels loop is one MB row and one MB column behind the decoding
+     * loop because we can only put pixels when overlap filtering is done. For
+     * interlaced frame pictures, however, the put pixels loop is only one
+     * column behind the decoding loop as interlaced frame pictures only need
+     * horizontal overlap filtering. */
+    if (!s->first_slice_line && v->fcm != ILACE_FRAME) {
+        if (s->mb_x) {
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1] :
+                            v->mb_type[0][s->block_index[i] - 2 * s->block_wrap[i] - 2]) {
+                    dest = s->dest[0] + ((i & 2) - 4) * 4 * s->linesize + ((i & 1) - 2) * 8;
+                    s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][i],
+                                                      i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
+                                                      i > 3 ? s->uvlinesize : s->linesize);
+                }
+            }
+        }
+        if (s->mb_x == s->mb_width - 1) {
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - s->block_wrap[i]] :
+                            v->mb_type[0][s->block_index[i] - 2 * s->block_wrap[i]]) {
+                    dest = s->dest[0] + ((i & 2) - 4) * 4 * s->linesize + (i & 1) * 8;
+                    s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][i],
+                                                      i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
+                                                      i > 3 ? s->uvlinesize : s->linesize);
+                }
+            }
+        }
+    }
+    if (s->mb_y == s->end_mb_y - 1 || v->fcm == ILACE_FRAME) {
         if (s->mb_x) {
-            topleft_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x - 1;
             if (v->fcm == ILACE_FRAME)
-                fieldtx = v->fieldtx_plane[topleft_mb_pos];
-            stride_y       = s->linesize << fieldtx;
-            v_dist         = (16 - fieldtx) >> (fieldtx == 0);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][0],
-                                              s->dest[0] - 16 * s->linesize - 16,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][1],
-                                              s->dest[0] - 16 * s->linesize - 8,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][2],
-                                              s->dest[0] - v_dist * s->linesize - 16,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][3],
-                                              s->dest[0] - v_dist * s->linesize - 8,
-                                              stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][4],
-                                              s->dest[1] - 8 * s->uvlinesize - 8,
-                                              s->uvlinesize);
-            s->idsp.put_signed_pixels_clamped(v->block[v->topleft_blk_idx][5],
-                                              s->dest[2] - 8 * s->uvlinesize - 8,
-                                              s->uvlinesize);
+                fieldtx = v->fieldtx_plane[s->mb_y * s->mb_stride + s->mb_x - 1];
+            for (i = 0; i < block_count; i++) {
+                if (i > 3 ? v->mb_type[0][s->block_index[i] - 1] :
+                            v->mb_type[0][s->block_index[i] - 2]) {
+                    if (fieldtx)
+                        dest = s->dest[0] + ((i & 2) >> 1) * s->linesize + ((i & 1) - 2) * 8;
+                    else
+                        dest = s->dest[0] + (i & 2) * 4 * s->linesize + ((i & 1) - 2) * 8;
+                    s->idsp.put_signed_pixels_clamped(v->block[v->left_blk_idx][i],
+                                                      i > 3 ? s->dest[i - 3] - 8 : dest,
+                                                      i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                }
             }
         }
         if (s->mb_x == s->mb_width - 1) {
-            top_mb_pos = (s->mb_y - 1) * s->mb_stride + s->mb_x;
             if (v->fcm == ILACE_FRAME)
-                fieldtx = v->fieldtx_plane[top_mb_pos];
-            stride_y   = s->linesize << fieldtx;
-            v_dist     = fieldtx ? 15 : 8;
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][0],
-                                              s->dest[0] - 16 * s->linesize,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][1],
-                                              s->dest[0] - 16 * s->linesize + 8,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][2],
-                                              s->dest[0] - v_dist * s->linesize,
-                                              stride_y);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][3],
-                                              s->dest[0] - v_dist * s->linesize + 8,
-                                              stride_y);
-            if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][4],
-                                              s->dest[1] - 8 * s->uvlinesize,
-                                              s->uvlinesize);
-            s->idsp.put_signed_pixels_clamped(v->block[v->top_blk_idx][5],
-                                              s->dest[2] - 8 * s->uvlinesize,
-                                              s->uvlinesize);
+                fieldtx = v->fieldtx_plane[s->mb_y * s->mb_stride + s->mb_x];
+            for (i = 0; i < block_count; i++) {
+                if (v->mb_type[0][s->block_index[i]]) {
+                    if (fieldtx)
+                        dest = s->dest[0] + ((i & 2) >> 1) * s->linesize + (i & 1) * 8;
+                    else
+                        dest = s->dest[0] + (i & 2) * 4 * s->linesize + (i & 1) * 8;
+                    s->idsp.put_signed_pixels_clamped(v->block[v->cur_blk_idx][i],
+                                                      i > 3 ? s->dest[i - 3] : dest,
+                                                      i > 3 ? s->uvlinesize : s->linesize << fieldtx);
+                }
             }
         }
     }
+}
 
 #define inc_blk_idx(idx) do { \
         idx++; \
@@ -143,12 +144,6 @@ static void vc1_put_signed_blocks_clamped(VC1Context *v)
             idx = 0; \
     } while (0)
 
-    inc_blk_idx(v->topleft_blk_idx);
-    inc_blk_idx(v->top_blk_idx);
-    inc_blk_idx(v->left_blk_idx);
-    inc_blk_idx(v->cur_blk_idx);
-}
-
 /***********************************************************************/
 /**
  * @name VC-1 Block-level functions
@@ -594,7 +589,7 @@ static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
         scale = s->c_dc_scale;
     block[0] = dcdiff * scale;
 
-    ac_val  = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val  = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
     if (dc_pred_dir) // left
         ac_val -= 16;
@@ -745,7 +740,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
 
     scale = mquant * 2 + ((mquant == v->pq) ? v->halfpq : 0);
 
-    ac_val  = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val  = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
     if (dc_pred_dir) // left
         ac_val -= 16;
@@ -946,7 +941,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
     if (!a_avail) dc_pred_dir = 1;
     if (!c_avail) dc_pred_dir = 0;
     if (!a_avail && !c_avail) use_pred = 0;
-    ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
+    ac_val = s->ac_val[0][s->block_index[n]];
     ac_val2 = ac_val;
 
     scale = mquant * 2 + v->halfpq;
@@ -1325,28 +1320,18 @@ static int vc1_decode_p_mb(VC1Context *v)
                     if (i == 1 || i == 3 || s->mb_x)
                         v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                    vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+                    vc1_decode_intra_block(v, v->block[v->cur_blk_idx][i], i, val, mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
                     if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
-                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+                    v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][i]);
                     if (v->rangeredfrm)
                         for (j = 0; j < 64; j++)
-                            s->block[i][j] <<= 1;
-                    s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                      s->dest[dst_idx] + off,
-                                                      i & 4 ? s->uvlinesize
-                                                            : s->linesize);
-                    if (v->pq >= 9 && v->overlap) {
-                        if (v->c_avail)
-                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                        if (v->a_avail)
-                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                    }
+                            v->block[v->cur_blk_idx][i][j] <<= 1;
                     block_cbp   |= 0xF << (i << 2);
                     block_intra |= 1 << i;
                 } else if (val) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][i], i, mquant, ttmb, first_block,
                                              s->dest[dst_idx] + off, (i & 4) ? s->uvlinesize : s->linesize,
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
                     block_cbp |= pat << (i << 2);
@@ -1436,28 +1421,18 @@ static int vc1_decode_p_mb(VC1Context *v)
                     if (i == 1 || i == 3 || s->mb_x)
                         v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                    vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant,
+                    vc1_decode_intra_block(v, v->block[v->cur_blk_idx][i], i, is_coded[i], mquant,
                                            (i & 4) ? v->codingset2 : v->codingset);
                     if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                         continue;
-                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+                    v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][i]);
                     if (v->rangeredfrm)
                         for (j = 0; j < 64; j++)
-                            s->block[i][j] <<= 1;
-                    s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                      s->dest[dst_idx] + off,
-                                                      (i & 4) ? s->uvlinesize
-                                                              : s->linesize);
-                    if (v->pq >= 9 && v->overlap) {
-                        if (v->c_avail)
-                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                        if (v->a_avail)
-                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
-                    }
+                            v->block[v->cur_blk_idx][i][j] <<= 1;
                     block_cbp   |= 0xF << (i << 2);
                     block_intra |= 1 << i;
                 } else if (is_coded[i]) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][i], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : s->linesize,
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
@@ -1484,6 +1459,10 @@ static int vc1_decode_p_mb(VC1Context *v)
         }
     }
 end:
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_signed_blocks_clamped(v);
+
     v->cbp[s->mb_x]      = block_cbp;
     v->ttblk[s->mb_x]    = block_tt;
     v->is_intra[s->mb_x] = block_intra;
@@ -1511,7 +1490,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
     int skipped, fourmv = 0, twomv = 0;
     int block_cbp = 0, pat, block_tt = 0;
     int idx_mbmode = 0, mvbp;
-    int stride_y, fieldtx;
+    int fieldtx;
 
     mquant = v->pq; /* Lossy initialization */
 
@@ -1584,22 +1563,16 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
                 if (i == 1 || i == 3 || s->mb_x)
                     v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-                vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+                vc1_decode_intra_block(v, v->block[v->cur_blk_idx][i], i, val, mquant,
                                        (i & 4) ? v->codingset2 : v->codingset);
                 if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                     continue;
-                v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
-                if (i < 4) {
-                    stride_y = s->linesize << fieldtx;
+                v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][i]);
+                if (i < 4)
                     off = (fieldtx) ? ((i & 1) * 8) + ((i & 2) >> 1) * s->linesize : (i & 1) * 8 + 4 * (i & 2) * s->linesize;
-                } else {
-                    stride_y = s->uvlinesize;
+                else
                     off = 0;
-                }
-                s->idsp.put_signed_pixels_clamped(s->block[i],
-                                                  s->dest[dst_idx] + off,
-                                                  stride_y);
-                //TODO: loop filter
+                block_cbp |= 0xf << (i << 2);
             }
 
         } else { // inter MB
@@ -1670,7 +1643,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
                 else
                     off = (i & 4) ? 0 : ((i & 1) * 8 + ((i > 1) * s->linesize));
                 if (val) {
-                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                    pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][i], i, mquant, ttmb,
                                              first_block, s->dest[dst_idx] + off,
                                              (i & 4) ? s->uvlinesize : (s->linesize << fieldtx),
                                              CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
@@ -1695,9 +1668,15 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
         v->blk_mv_type[s->block_index[3]] = 0;
         ff_vc1_pred_mv_intfr(v, 0, 0, 0, 1, v->range_x, v->range_y, v->mb_type[0], 0);
         ff_vc1_mc_1mv(v, 0);
+        v->fieldtx_plane[mb_pos] = 0;
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0])*s->mb_stride);
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_signed_blocks_clamped(v);
+
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -1750,17 +1729,13 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
             if (i == 1 || i == 3 || s->mb_x)
                 v->c_avail = v->mb_type[0][s->block_index[i] - 1];
 
-            vc1_decode_intra_block(v, s->block[i], i, val, mquant,
+            vc1_decode_intra_block(v, v->block[v->cur_blk_idx][i], i, val, mquant,
                                    (i & 4) ? v->codingset2 : v->codingset);
             if (CONFIG_GRAY && (i > 3) && (s->avctx->flags & AV_CODEC_FLAG_GRAY))
                 continue;
-            v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
+            v->vc1dsp.vc1_inv_trans_8x8(v->block[v->cur_blk_idx][i]);
             off  = (i & 4) ? 0 : ((i & 1) * 8 + (i & 2) * 4 * s->linesize);
-            s->idsp.put_signed_pixels_clamped(s->block[i],
-                                              s->dest[dst_idx] + off,
-                                              (i & 4) ? s->uvlinesize
-                                                      : s->linesize);
-            // TODO: loop filter
+            block_cbp |= 0xf << (i << 2);
         }
     } else {
         s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -1803,7 +1778,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
             val = ((cbp >> (5 - i)) & 1);
             off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
             if (val) {
-                pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                pat = vc1_decode_p_block(v, v->block[v->cur_blk_idx][i], i, mquant, ttmb,
                                          first_block, s->dest[dst_idx] + off,
                                          (i & 4) ? s->uvlinesize : s->linesize,
                                          CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY),
@@ -1815,8 +1790,13 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
             }
         }
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
+    if (v->overlap && v->pq >= 9)
+        ff_vc1_p_overlap_filter(v);
+    vc1_put_signed_blocks_clamped(v);
+
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -1993,6 +1973,7 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
     int fwd;
     int dmv_x[2], dmv_y[2], pred_flag[2];
     int bmvtype = BMV_TYPE_BACKWARD;
+    int block_cbp = 0, pat, block_tt = 0;
     int idx_mbmode;
 
     mquant      = v->pq; /* Lossy initialization */
@@ -2039,7 +2020,6 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
                                               s->dest[dst_idx] + off,
                                               (i & 4) ? s->uvlinesize
                                                       : s->linesize);
-            // TODO: yet to perform loop filter
         }
     } else {
         s->mb_intra = v->is_intra[s->mb_x] = 0;
@@ -2123,16 +2103,19 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
             val = ((cbp >> (5 - i)) & 1);
             off = (i & 4) ? 0 : (i & 1) * 8 + (i & 2) * 4 * s->linesize;
             if (val) {
-                vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
-                                   first_block, s->dest[dst_idx] + off,
-                                   (i & 4) ? s->uvlinesize : s->linesize,
-                                   CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), NULL);
+                pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb,
+                                         first_block, s->dest[dst_idx] + off,
+                                         (i & 4) ? s->uvlinesize : s->linesize,
+                                         CONFIG_GRAY && (i & 4) && (s->avctx->flags & AV_CODEC_FLAG_GRAY), &block_tt);
+                block_cbp |= pat << (i << 2);
                 if (!v->ttmbf && ttmb < 8)
                     ttmb = -1;
                 first_block = 0;
             }
         }
     }
+    v->cbp[s->mb_x]      = block_cbp;
+    v->ttblk[s->mb_x]    = block_tt;
 }
 
 /** Decode one B-frame MB (in interlaced frame B picture)
@@ -2473,12 +2456,12 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
             if (direct || bmvtype == BMV_TYPE_INTERPOLATED) {
                 ff_vc1_interp_mc(v);
             }
+            v->fieldtx_plane[mb_pos] = 0;
         }
     }
-    if (s->mb_x == s->mb_width - 1)
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
     v->cbp[s->mb_x]      = block_cbp;
     v->ttblk[s->mb_x]    = block_tt;
+
     return 0;
 }
 
@@ -2708,6 +2691,8 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
             s->c_dc_scale = s->c_dc_scale_table[mquant];
 
             for (k = 0; k < 6; k++) {
+                v->mb_type[0][s->block_index[k]] = 1;
+
                 val = ((cbp >> (5 - k)) & 1);
 
                 if (k < 4) {
@@ -2728,10 +2713,11 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
                 v->vc1dsp.vc1_inv_trans_8x8(block[k]);
             }
 
-            ff_vc1_smooth_overlap_filter_iblk(v);
+            if (v->overlap && v->condover != CONDOVER_NONE)
+                ff_vc1_i_overlap_filter(v);
             vc1_put_signed_blocks_clamped(v);
             if (v->s.loop_filter)
-                ff_vc1_loop_filter_iblk_delayed(v, v->pq);
+                ff_vc1_i_loop_filter(v);
 
             if (get_bits_count(&s->gb) > v->bits) {
                 // TODO: may need modification to handle slice coding
@@ -2740,6 +2726,10 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
                        get_bits_count(&s->gb), v->bits);
                 return;
             }
+            inc_blk_idx(v->topleft_blk_idx);
+            inc_blk_idx(v->top_blk_idx);
+            inc_blk_idx(v->left_blk_idx);
+            inc_blk_idx(v->cur_blk_idx);
         }
         if (!v->s.loop_filter)
             ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
@@ -2748,15 +2738,6 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
         s->first_slice_line = 0;
     }
 
-    /* raw bottom MB row */
-    s->mb_x = 0;
-    init_block_index(v);
-    for (; s->mb_x < s->mb_width; s->mb_x++) {
-        ff_update_block_index(s);
-        vc1_put_signed_blocks_clamped(v);
-        if (v->s.loop_filter)
-            ff_vc1_loop_filter_iblk_delayed(v, v->pq);
-    }
     if (v->s.loop_filter)
         ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
     ff_er_add_slice(&s->er, 0, s->start_mb_y << v->field_mode, s->mb_width - 1,
@@ -2793,23 +2774,28 @@ static void vc1_decode_p_blocks(VC1Context *v)
         break;
     }
 
-    apply_loop_filter   = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY) &&
-                          v->fcm == PROGRESSIVE;
+    apply_loop_filter   = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY);
     s->first_slice_line = 1;
-    memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride);
+    memset(v->cbp_base, 0, sizeof(v->cbp_base[0]) * 3 * s->mb_stride);
     for (s->mb_y = s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
         s->mb_x = 0;
         init_block_index(v);
         for (; s->mb_x < s->mb_width; s->mb_x++) {
             ff_update_block_index(s);
 
-            if (v->fcm == ILACE_FIELD)
+            if (v->fcm == ILACE_FIELD) {
                 vc1_decode_p_mb_intfi(v);
-            else if (v->fcm == ILACE_FRAME)
+                if (apply_loop_filter)
+                    ff_vc1_p_loop_filter(v);
+            } else if (v->fcm == ILACE_FRAME) {
                 vc1_decode_p_mb_intfr(v);
-            else vc1_decode_p_mb(v);
-            if (s->mb_y != s->start_mb_y && apply_loop_filter)
-                ff_vc1_apply_p_loop_filter(v);
+                if (apply_loop_filter)
+                    ff_vc1_p_intfr_loop_filter(v);
+            } else {
+                vc1_decode_p_mb(v);
+                if (apply_loop_filter)
+                    ff_vc1_p_loop_filter(v);
+            }
             if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
                 // TODO: may need modification to handle slice coding
                 ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2817,23 +2803,27 @@ static void vc1_decode_p_blocks(VC1Context *v)
                        get_bits_count(&s->gb), v->bits, s->mb_x, s->mb_y);
                 return;
             }
+            inc_blk_idx(v->topleft_blk_idx);
+            inc_blk_idx(v->top_blk_idx);
+            inc_blk_idx(v->left_blk_idx);
+            inc_blk_idx(v->cur_blk_idx);
         }
-        memmove(v->cbp_base,      v->cbp,      sizeof(v->cbp_base[0])      * s->mb_stride);
-        memmove(v->ttblk_base,    v->ttblk,    sizeof(v->ttblk_base[0])    * s->mb_stride);
-        memmove(v->is_intra_base, v->is_intra, sizeof(v->is_intra_base[0]) * s->mb_stride);
-        memmove(v->luma_mv_base,  v->luma_mv,  sizeof(v->luma_mv_base[0])  * s->mb_stride);
+        memmove(v->cbp_base,
+                v->cbp - s->mb_stride,
+                sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+        memmove(v->ttblk_base,
+                v->ttblk - s->mb_stride,
+                sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
+        memmove(v->is_intra_base,
+                v->is_intra - s->mb_stride,
+                sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
+        memmove(v->luma_mv_base,
+                v->luma_mv - s->mb_stride,
+                sizeof(v->luma_mv_base[0]) * 2 * s->mb_stride);
         if (s->mb_y != s->start_mb_y)
             ff_mpeg_draw_horiz_band(s, (s->mb_y - 1) * 16, 16);
         s->first_slice_line = 0;
     }
-    if (apply_loop_filter) {
-        s->mb_x = 0;
-        init_block_index(v);
-        for (; s->mb_x < s->mb_width; s->mb_x++) {
-            ff_update_block_index(s);
-            ff_vc1_apply_p_loop_filter(v);
-        }
-    }
     if (s->end_mb_y >= s->start_mb_y)
         ff_mpeg_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
     ff_er_add_slice(&s->er, 0, s->start_mb_y << v->field_mode, s->mb_width - 1,
@@ -2876,12 +2866,19 @@ static void vc1_decode_b_blocks(VC1Context *v)
         for (; s->mb_x < s->mb_width; s->mb_x++) {
             ff_update_block_index(s);
 
-            if (v->fcm == ILACE_FIELD)
+            if (v->fcm == ILACE_FIELD) {
                 vc1_decode_b_mb_intfi(v);
-            else if (v->fcm == ILACE_FRAME)
+                if (v->s.loop_filter)
+                    ff_vc1_b_intfi_loop_filter(v);
+            } else if (v->fcm == ILACE_FRAME) {
                 vc1_decode_b_mb_intfr(v);
-            else
+                if (v->s.loop_filter)
+                    ff_vc1_p_intfr_loop_filter(v);
+            } else {
                 vc1_decode_b_mb(v);
+                if (v->s.loop_filter)
+                    ff_vc1_i_loop_filter(v);
+            }
             if (get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
                 // TODO: may need modification to handle slice coding
                 ff_er_add_slice(&s->er, 0, s->start_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR);
@@ -2889,9 +2886,16 @@ static void vc1_decode_b_blocks(VC1Context *v)
                        get_bits_count(&s->gb), v->bits, s->mb_x, s->mb_y);
                 return;
             }
-            if (v->s.loop_filter)
-                ff_vc1_loop_filter_iblk(v, v->pq);
         }
+        memmove(v->cbp_base,
+                v->cbp - s->mb_stride,
+                sizeof(v->cbp_base[0]) * 2 * s->mb_stride);
+        memmove(v->ttblk_base,
+                v->ttblk - s->mb_stride,
+                sizeof(v->ttblk_base[0]) * 2 * s->mb_stride);
+        memmove(v->is_intra_base,
+                v->is_intra - s->mb_stride,
+                sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
         if (!v->s.loop_filter)
             ff_mpeg_draw_horiz_band(s, s->mb_y * 16, 16);
         else if (s->mb_y)