dsputil: x86: Convert h263 loop filter to yasm

[ffmpeg] / libavcodec / vc1dec.c
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c

index cb15dee982fb183e34fdf7a30f38e10cc50641f4..38b82163a5a5b35afa1f51315addeb57d5dfa7d7 100644 (file)
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -31,6 +31,7 @@
  #include "avcodec.h"
  #include "mpegvideo.h"
  #include "h263.h"
+#include "h264chroma.h"
  #include "vc1.h"
  #include "vc1data.h"
  #include "vc1acdata.h"
@@ -331,6 +332,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
  {
      MpegEncContext *s = &v->s;
      DSPContext *dsp   = &v->s.dsp;
+    H264ChromaContext *h264chroma = &v->h264chroma;
      uint8_t *srcY, *srcU, *srcV;
      int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
      int off, off_uv;
@@ -430,19 +432,19 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
      if (v->rangeredfrm || (v->mv_mode == MV_PMODE_INTENSITY_COMP)
          || s->h_edge_pos < 22 || v_edge_pos < 22
          || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
-        || (unsigned)(src_y - s->mspel) > v_edge_pos    - (my&3) - 16 - s->mspel * 3) {
+        || (unsigned)(src_y - 1)        > v_edge_pos    - (my&3) - 16 - 3) {
          uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
  
          srcY -= s->mspel * (1 + s->linesize);
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
-                                17 + s->mspel * 2, 17 + s->mspel * 2,
-                                src_x - s->mspel, src_y - s->mspel,
-                                s->h_edge_pos, v_edge_pos);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
+                                 17 + s->mspel * 2, 17 + s->mspel * 2,
+                                 src_x - s->mspel, src_y - s->mspel,
+                                 s->h_edge_pos, v_edge_pos);
          srcY = s->edge_emu_buffer;
-        s->dsp.emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8 + 1, 8 + 1,
-                                uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
-                                uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8 + 1, 8 + 1,
+                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
+                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
          srcU = uvbuf;
          srcV = uvbuf + 16;
          /* if we deal with range reduction we need to scale source blocks */
@@ -519,8 +521,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
      uvmx = (uvmx & 3) << 1;
      uvmy = (uvmy & 3) << 1;
      if (!v->rnd) {
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
      } else {
          v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
          v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -667,10 +669,10 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir)
          || (unsigned)(src_y - (s->mspel << fieldmv)) > v_edge_pos - (my & 3) - ((8 + s->mspel * 2) << fieldmv)) {
          srcY -= s->mspel * (1 + (s->linesize << fieldmv));
          /* check emulate edge stride and offset */
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
-                                9 + s->mspel * 2, (9 + s->mspel * 2) << fieldmv,
-                                src_x - s->mspel, src_y - (s->mspel << fieldmv),
-                                s->h_edge_pos, v_edge_pos);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
+                                 9 + s->mspel * 2, (9 + s->mspel * 2) << fieldmv,
+                                 src_x - s->mspel, src_y - (s->mspel << fieldmv),
+                                 s->h_edge_pos, v_edge_pos);
          srcY = s->edge_emu_buffer;
          /* if we deal with range reduction we need to scale source blocks */
          if (v->rangeredfrm) {
@@ -769,7 +771,7 @@ static av_always_inline int get_chroma_mv(int *mvx, int *mvy, int *a, int flag,
  static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
  {
      MpegEncContext *s = &v->s;
-    DSPContext *dsp   = &v->s.dsp;
+    H264ChromaContext *h264chroma = &v->h264chroma;
      uint8_t *srcU, *srcV;
      int uvmx, uvmy, uvsrc_x, uvsrc_y;
      int k, tx = 0, ty = 0;
@@ -794,6 +796,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
      /* calculate chroma MV vector from four luma MVs */
      if (!v->field_mode || (v->field_mode && !v->numref)) {
          valid_count = get_chroma_mv(mvx, mvy, intra, 0, &tx, &ty);
+        chroma_ref_type = v->reffield;
          if (!valid_count) {
              s->current_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
              s->current_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1] = 0;
@@ -867,12 +870,12 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
          || s->h_edge_pos < 18 || v_edge_pos < 18
          || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 9
          || (unsigned)uvsrc_y > (v_edge_pos    >> 1) - 9) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer     , srcU, s->uvlinesize,
-                                8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
-                                s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
-                                8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
-                                s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer     , srcU, s->uvlinesize,
+                                 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
+                                 8 + 1, 8 + 1, uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
          srcU = s->edge_emu_buffer;
          srcV = s->edge_emu_buffer + 16;
  
@@ -914,8 +917,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
      uvmx = (uvmx & 3) << 1;
      uvmy = (uvmy & 3) << 1;
      if (!v->rnd) {
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
-        dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
      } else {
          v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
          v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -927,7 +930,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
  static void vc1_mc_4mv_chroma4(VC1Context *v)
  {
      MpegEncContext *s = &v->s;
-    DSPContext *dsp = &v->s.dsp;
+    H264ChromaContext *h264chroma = &v->h264chroma;
      uint8_t *srcU, *srcV;
      int uvsrc_x, uvsrc_y;
      int uvmx_field[4], uvmy_field[4];
@@ -972,12 +975,12 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
              || s->h_edge_pos < 10 || v_edge_pos < (5 << fieldmv)
              || (unsigned)uvsrc_x > (s->h_edge_pos >> 1) - 5
              || (unsigned)uvsrc_y > v_edge_pos - (5 << fieldmv)) {
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcU, s->uvlinesize,
-                                    5, (5 << fieldmv), uvsrc_x, uvsrc_y,
-                                    s->h_edge_pos >> 1, v_edge_pos);
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
-                                    5, (5 << fieldmv), uvsrc_x, uvsrc_y,
-                                    s->h_edge_pos >> 1, v_edge_pos);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcU, s->uvlinesize,
+                                     5, (5 << fieldmv), uvsrc_x, uvsrc_y,
+                                     s->h_edge_pos >> 1, v_edge_pos);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer + 16, srcV, s->uvlinesize,
+                                     5, (5 << fieldmv), uvsrc_x, uvsrc_y,
+                                     s->h_edge_pos >> 1, v_edge_pos);
              srcU = s->edge_emu_buffer;
              srcV = s->edge_emu_buffer + 16;
  
@@ -999,8 +1002,8 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
              }
          }
          if (!v->rnd) {
-            dsp->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
-            dsp->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
+            h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
+            h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
          } else {
              v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
              v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
@@ -1132,8 +1135,12 @@ static av_always_inline void get_mvdata_interlaced(VC1Context *v, int *dmv_x,
          *dmv_x = get_bits(gb, v->k_x);
          *dmv_y = get_bits(gb, v->k_y);
          if (v->numref) {
-            *pred_flag = *dmv_y & 1;
-            *dmv_y     = (*dmv_y + *pred_flag) >> 1;
+            if (pred_flag) {
+                *pred_flag = *dmv_y & 1;
+                *dmv_y     = (*dmv_y + *pred_flag) >> 1;
+            } else {
+                *dmv_y     = (*dmv_y + (*dmv_y & 1)) >> 1;
+            }
          }
      }
      else {
@@ -1159,7 +1166,7 @@ static av_always_inline void get_mvdata_interlaced(VC1Context *v, int *dmv_x,
              *dmv_y = (sign ^ ((val >> 1) + offs_tab[index1 >> v->numref])) - sign;
          } else
              *dmv_y = 0;
-        if (v->numref)
+        if (v->numref && pred_flag)
              *pred_flag = index1 & 1;
      }
  }
@@ -1349,7 +1356,7 @@ static inline void vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
      int px, py;
      int sum;
      int mixedmv_pic, num_samefield = 0, num_oppfield = 0;
-    int opposit, a_f, b_f, c_f;
+    int opposite, a_f, b_f, c_f;
      int16_t field_predA[2];
      int16_t field_predB[2];
      int16_t field_predC[2];
@@ -1457,13 +1464,19 @@ static inline void vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
      }
  
      if (v->field_mode) {
-        if (num_samefield <= num_oppfield)
-            opposit = 1 - pred_flag;
-        else
-            opposit = pred_flag;
+        if (!v->numref)
+            // REFFIELD determines if the last field or the second-last field is
+            // to be used as reference
+            opposite = 1 - v->reffield;
+        else {
+            if (num_samefield <= num_oppfield)
+                opposite = 1 - pred_flag;
+            else
+                opposite = pred_flag;
+        }
      } else
-        opposit = 0;
-    if (opposit) {
+        opposite = 0;
+    if (opposite) {
          if (a_valid && !a_f) {
              field_predA[0] = scaleforopp(v, field_predA[0], 0, dir);
              field_predA[1] = scaleforopp(v, field_predA[1], 1, dir);
@@ -1566,10 +1579,6 @@ static inline void vc1_pred_mv(VC1Context *v, int n, int dmv_x, int dmv_y,
          }
      }
  
-    if (v->field_mode && !s->quarter_sample) {
-        r_x <<= 1;
-        r_y <<= 1;
-    }
      if (v->field_mode && v->numref)
          r_y >>= 1;
      if (v->field_mode && v->cur_field_type && v->ref_field_type[dir] == 0)
@@ -1821,6 +1830,7 @@ static void vc1_interp_mc(VC1Context *v)
  {
      MpegEncContext *s = &v->s;
      DSPContext *dsp = &v->s.dsp;
+    H264ChromaContext *h264chroma = &v->h264chroma;
      uint8_t *srcY, *srcU, *srcV;
      int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
      int off, off_uv;
@@ -1885,15 +1895,15 @@ static void vc1_interp_mc(VC1Context *v)
          uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
  
          srcY -= s->mspel * (1 + s->linesize);
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
-                                17 + s->mspel * 2, 17 + s->mspel * 2,
-                                src_x - s->mspel, src_y - s->mspel,
-                                s->h_edge_pos, v_edge_pos);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize,
+                                 17 + s->mspel * 2, 17 + s->mspel * 2,
+                                 src_x - s->mspel, src_y - s->mspel,
+                                 s->h_edge_pos, v_edge_pos);
          srcY = s->edge_emu_buffer;
-        s->dsp.emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8 + 1, 8 + 1,
-                                uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->dsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
-                                uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(uvbuf     , srcU, s->uvlinesize, 8 + 1, 8 + 1,
+                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(uvbuf + 16, srcV, s->uvlinesize, 8 + 1, 8 + 1,
+                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
          srcU = uvbuf;
          srcV = uvbuf + 16;
          /* if we deal with range reduction we need to scale source blocks */
@@ -1942,7 +1952,7 @@ static void vc1_interp_mc(VC1Context *v)
          if (!v->rnd)
              dsp->avg_pixels_tab[0][dxy](s->dest[0] + off, srcY, s->linesize, 16);
          else
-            dsp->avg_no_rnd_pixels_tab[0][dxy](s->dest[0] + off, srcY, s->linesize, 16);
+            dsp->avg_no_rnd_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, 16);
      }
  
      if (s->flags & CODEC_FLAG_GRAY) return;
@@ -1950,8 +1960,8 @@ static void vc1_interp_mc(VC1Context *v)
      uvmx = (uvmx & 3) << 1;
      uvmy = (uvmy & 3) << 1;
      if (!v->rnd) {
-        dsp->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
-        dsp->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
+        h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
      } else {
          v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
          v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -2531,7 +2541,7 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip,
   * @param coded are AC coeffs present or not
   * @param codingset set of VLC to decode data
   */
-static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n,
+static int vc1_decode_i_block(VC1Context *v, int16_t block[64], int n,
                                int coded, int codingset)
  {
      GetBitContext *gb = &v->s.gb;
@@ -2694,7 +2704,7 @@ not_coded:
   * @param codingset set of VLC to decode data
   * @param mquant quantizer value for this macroblock
   */
-static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n,
+static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
                                    int coded, int codingset, int mquant)
  {
      GetBitContext *gb = &v->s.gb;
@@ -2906,7 +2916,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n,
   * @param mquant block quantizer
   * @param codingset set of VLC to decode data
   */
-static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n,
+static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
                                    int coded, int mquant, int codingset)
  {
      GetBitContext *gb = &v->s.gb;
@@ -3116,7 +3126,7 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n,
  
  /** Decode P block
   */
-static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n,
+static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
                                int mquant, int ttmb, int first_block,
                                uint8_t *dst, int linesize, int skip_block,
                                int *ttmb_out)
@@ -4348,10 +4358,10 @@ static void vc1_decode_i_blocks(VC1Context *v)
      s->mb_x = s->mb_y = 0;
      s->mb_intra         = 1;
      s->first_slice_line = 1;
-    for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+    for (s->mb_y = 0; s->mb_y < s->end_mb_y; s->mb_y++) {
          s->mb_x = 0;
          ff_init_block_index(s);
-        for (; s->mb_x < s->mb_width; s->mb_x++) {
+        for (; s->mb_x < v->end_mb_x; s->mb_x++) {
              uint8_t *dst[6];
              ff_update_block_index(s);
              dst[0] = s->dest[0];
@@ -4438,7 +4448,10 @@ static void vc1_decode_i_blocks(VC1Context *v)
          s->first_slice_line = 0;
      }
      if (v->s.loop_filter)
-        ff_draw_horiz_band(s, (s->mb_height - 1) * 16, 16);
+        ff_draw_horiz_band(s, (s->end_mb_y - 1) * 16, 16);
+
+    /* This is intentionally mb_height and not end_mb_y - unlike in advanced
+     * profile, these only differ are when decoding MSS2 rectangles. */
      ff_er_add_slice(s, 0, 0, s->mb_width - 1, s->mb_height - 1, ER_MB_END);
  }
  
@@ -4495,7 +4508,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
          s->mb_x = 0;
          ff_init_block_index(s);
          for (;s->mb_x < s->mb_width; s->mb_x++) {
-            DCTELEM (*block)[64] = v->block[v->cur_blk_idx];
+            int16_t (*block)[64] = v->block[v->cur_blk_idx];
              ff_update_block_index(s);
              s->dsp.clear_blocks(block[0]);
              mb_pos = s->mb_x + s->mb_y * s->mb_stride;
@@ -4732,7 +4745,7 @@ static void vc1_decode_skip_blocks(VC1Context *v)
      s->pict_type = AV_PICTURE_TYPE_P;
  }
  
-static void vc1_decode_blocks(VC1Context *v)
+void ff_vc1_decode_blocks(VC1Context *v)
  {
  
      v->s.esc3_level_length = 0;
@@ -4890,7 +4903,7 @@ static void vc1_parse_sprites(VC1Context *v, GetBitContext* gb, SpriteData* sd)
          av_log(avctx, AV_LOG_DEBUG, "Effect flag set\n");
  
      if (get_bits_count(gb) >= gb->size_in_bits +
-       (avctx->codec_id == CODEC_ID_WMV3IMAGE ? 64 : 0))
+       (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE ? 64 : 0))
          av_log(avctx, AV_LOG_ERROR, "Buffer overrun\n");
      if (get_bits_count(gb) < gb->size_in_bits - 8)
          av_log(avctx, AV_LOG_WARNING, "Buffer not fully read\n");
@@ -5016,7 +5029,7 @@ static int vc1_decode_sprites(VC1Context *v, GetBitContext* gb)
  
      v->sprite_output_frame.buffer_hints = FF_BUFFER_HINTS_VALID;
      v->sprite_output_frame.reference = 0;
-    if (avctx->get_buffer(avctx, &v->sprite_output_frame) < 0) {
+    if (ff_get_buffer(avctx, &v->sprite_output_frame) < 0) {
          av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
          return -1;
      }
@@ -5046,7 +5059,7 @@ static void vc1_sprite_flush(AVCodecContext *avctx)
  
  #endif
  
-static av_cold int vc1_decode_init_alloc_tables(VC1Context *v)
+av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
  {
      MpegEncContext *s = &v->s;
      int i;
@@ -5099,7 +5112,7 @@ static av_cold int vc1_decode_init_alloc_tables(VC1Context *v)
  
      ff_intrax8_common_init(&v->x8,s);
  
-    if (s->avctx->codec_id == CODEC_ID_WMV3IMAGE || s->avctx->codec_id == CODEC_ID_VC1IMAGE) {
+    if (s->avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || s->avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
          for (i = 0; i < 4; i++)
              if (!(v->sr_rows[i >> 1][i & 1] = av_malloc(v->output_width))) return -1;
      }
@@ -5112,6 +5125,21 @@ static av_cold int vc1_decode_init_alloc_tables(VC1Context *v)
      return 0;
  }
  
+av_cold void ff_vc1_init_transposed_scantables(VC1Context *v)
+{
+    int i;
+    for (i = 0; i < 64; i++) {
+#define transpose(x) ((x >> 3) | ((x & 7) << 3))
+        v->zz_8x8[0][i] = transpose(ff_wmv1_scantable[0][i]);
+        v->zz_8x8[1][i] = transpose(ff_wmv1_scantable[1][i]);
+        v->zz_8x8[2][i] = transpose(ff_wmv1_scantable[2][i]);
+        v->zz_8x8[3][i] = transpose(ff_wmv1_scantable[3][i]);
+        v->zzi_8x8[i]   = transpose(ff_vc1_adv_interlaced_8x8_zz[i]);
+    }
+    v->left_blk_sh = 0;
+    v->top_blk_sh  = 3;
+}
+
  /** Initialize a VC1/WMV3 decoder
   * @todo TODO: Handle VC-1 IDUs (Transport level?)
   * @todo TODO: Decypher remaining bits in extra_data
@@ -5121,7 +5149,6 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
      VC1Context *v = avctx->priv_data;
      MpegEncContext *s = &v->s;
      GetBitContext gb;
-    int i;
  
      /* save the container output size for WMImage */
      v->output_width  = avctx->width;
@@ -5132,21 +5159,18 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
      if (!(avctx->flags & CODEC_FLAG_GRAY))
          avctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts);
      else
-        avctx->pix_fmt = PIX_FMT_GRAY8;
+        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
      avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
      v->s.avctx = avctx;
      avctx->flags |= CODEC_FLAG_EMU_EDGE;
      v->s.flags   |= CODEC_FLAG_EMU_EDGE;
  
-    if (avctx->idct_algo == FF_IDCT_AUTO) {
-        avctx->idct_algo = FF_IDCT_WMV2;
-    }
-
      if (ff_vc1_init_common(v) < 0)
          return -1;
+    ff_h264chroma_init(&v->h264chroma, 8);
      ff_vc1dsp_init(&v->vc1dsp);
  
-    if (avctx->codec_id == CODEC_ID_WMV3 || avctx->codec_id == CODEC_ID_WMV3IMAGE) {
+    if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
          int count = 0;
  
          // looks like WMV3 has a sequence header stored in the extradata
@@ -5211,7 +5235,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
              av_log(avctx, AV_LOG_ERROR, "Incomplete extradata\n");
              return -1;
          }
-        v->res_sprite = (avctx->codec_tag == MKTAG('W','V','P','2'));
+        v->res_sprite = (avctx->codec_id == AV_CODEC_ID_VC1IMAGE);
      }
  
      avctx->profile = v->profile;
@@ -5224,23 +5248,14 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
      s->mb_height = (avctx->coded_height + 15) >> 4;
  
      if (v->profile == PROFILE_ADVANCED || v->res_fasttx) {
-        for (i = 0; i < 64; i++) {
-#define transpose(x) ((x >> 3) | ((x & 7) << 3))
-            v->zz_8x8[0][i] = transpose(ff_wmv1_scantable[0][i]);
-            v->zz_8x8[1][i] = transpose(ff_wmv1_scantable[1][i]);
-            v->zz_8x8[2][i] = transpose(ff_wmv1_scantable[2][i]);
-            v->zz_8x8[3][i] = transpose(ff_wmv1_scantable[3][i]);
-            v->zzi_8x8[i] = transpose(ff_vc1_adv_interlaced_8x8_zz[i]);
-        }
-        v->left_blk_sh = 0;
-        v->top_blk_sh  = 3;
+        ff_vc1_init_transposed_scantables(v);
      } else {
          memcpy(v->zz_8x8, ff_wmv1_scantable, 4*64);
          v->left_blk_sh = 3;
          v->top_blk_sh  = 0;
      }
  
-    if (avctx->codec_id == CODEC_ID_WMV3IMAGE || avctx->codec_id == CODEC_ID_VC1IMAGE) {
+    if (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
          v->sprite_width  = avctx->coded_width;
          v->sprite_height = avctx->coded_height;
  
@@ -5259,12 +5274,12 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
  /** Close a VC1/WMV3 decoder
   * @warning Initial try at using MpegEncContext stuff
   */
-static av_cold int vc1_decode_end(AVCodecContext *avctx)
+av_cold int ff_vc1_decode_end(AVCodecContext *avctx)
  {
      VC1Context *v = avctx->priv_data;
      int i;
  
-    if ((avctx->codec_id == CODEC_ID_WMV3IMAGE || avctx->codec_id == CODEC_ID_VC1IMAGE)
+    if ((avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE)
          && v->sprite_output_frame.data[0])
          avctx->release_buffer(avctx, &v->sprite_output_frame);
      for (i = 0; i < 4; i++)
@@ -5297,7 +5312,7 @@ static av_cold int vc1_decode_end(AVCodecContext *avctx)
   * @todo TODO: Handle VC-1 IDUs (Transport level?)
   */
  static int vc1_decode_frame(AVCodecContext *avctx, void *data,
-                            int *data_size, AVPacket *avpkt)
+                            int *got_frame, AVPacket *avpkt)
  {
      const uint8_t *buf = avpkt->data;
      int buf_size = avpkt->size, n_slices = 0, i;
@@ -5320,7 +5335,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
              *pict = s->next_picture_ptr->f;
              s->next_picture_ptr = NULL;
  
-            *data_size = sizeof(AVFrame);
+            *got_frame = 1;
          }
  
          return 0;
@@ -5328,13 +5343,13 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
  
      if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) {
          if (v->profile < PROFILE_ADVANCED)
-            avctx->pix_fmt = PIX_FMT_VDPAU_WMV3;
+            avctx->pix_fmt = AV_PIX_FMT_VDPAU_WMV3;
          else
-            avctx->pix_fmt = PIX_FMT_VDPAU_VC1;
+            avctx->pix_fmt = AV_PIX_FMT_VDPAU_VC1;
      }
  
      //for advanced profile we may need to parse and unescape data
-    if (avctx->codec_id == CODEC_ID_VC1 || avctx->codec_id == CODEC_ID_VC1IMAGE) {
+    if (avctx->codec_id == AV_CODEC_ID_VC1 || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
          int buf_size2 = 0;
          buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
  
@@ -5356,9 +5371,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                      break;
                  case VC1_CODE_FIELD: {
                      int buf_size3;
-                    slices = av_realloc(slices, sizeof(*slices) * (n_slices+1));
-                    if (!slices)
+                    tmp = av_realloc(slices, sizeof(*slices) * (n_slices+1));
+                    if (!tmp)
                          goto err;
+                    slices = tmp;
                      slices[n_slices].buf = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
                      if (!slices[n_slices].buf)
                          goto err;
@@ -5380,9 +5396,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                      break;
                  case VC1_CODE_SLICE: {
                      int buf_size3;
-                    slices = av_realloc(slices, sizeof(*slices) * (n_slices+1));
-                    if (!slices)
+                    tmp = av_realloc(slices, sizeof(*slices) * (n_slices+1));
+                    if (!tmp)
                          goto err;
+                    slices = tmp;
                      slices[n_slices].buf = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
                      if (!slices[n_slices].buf)
                          goto err;
@@ -5430,11 +5447,11 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
      if (v->res_sprite) {
          v->new_sprite  = !get_bits1(&s->gb);
          v->two_sprites =  get_bits1(&s->gb);
-        /* res_sprite means a Windows Media Image stream, CODEC_ID_*IMAGE means
+        /* res_sprite means a Windows Media Image stream, AV_CODEC_ID_*IMAGE means
             we're using the sprite compositor. These are intentionally kept separate
             so you can get the raw sprites by using the wmv3 decoder for WMVP or
             the vc1 one for WVP2 */
-        if (avctx->codec_id == CODEC_ID_WMV3IMAGE || avctx->codec_id == CODEC_ID_VC1IMAGE) {
+        if (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
              if (v->new_sprite) {
                  // switch AVCodecContext parameters to those of the sprites
                  avctx->width  = avctx->coded_width  = v->sprite_width;
@@ -5448,12 +5465,12 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
      if (s->context_initialized &&
          (s->width  != avctx->coded_width ||
           s->height != avctx->coded_height)) {
-        vc1_decode_end(avctx);
+        ff_vc1_decode_end(avctx);
      }
  
      if (!s->context_initialized) {
-        if (ff_msmpeg4_decode_init(avctx) < 0 || vc1_decode_init_alloc_tables(v) < 0)
-            return -1;
+        if (ff_msmpeg4_decode_init(avctx) < 0 || ff_vc1_decode_init_alloc_tables(v) < 0)
+            goto err;
  
          s->low_delay = !avctx->has_b_frames || v->res_sprite;
  
@@ -5484,7 +5501,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
          }
      }
  
-    if ((avctx->codec_id == CODEC_ID_WMV3IMAGE || avctx->codec_id == CODEC_ID_VC1IMAGE)
+    if ((avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE)
          && s->pict_type != AV_PICTURE_TYPE_I) {
          av_log(v->s.avctx, AV_LOG_ERROR, "Sprite decoder: expected I-frame\n");
          goto err;
@@ -5507,7 +5524,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
      s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
  
      /* skip B-frames if we don't have reference frames */
-    if (s->last_picture_ptr == NULL && (s->pict_type == AV_PICTURE_TYPE_B || s->dropable)) {
+    if (s->last_picture_ptr == NULL && (s->pict_type == AV_PICTURE_TYPE_B || s->droppable)) {
          goto err;
      }
      if ((avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B) ||
@@ -5544,6 +5561,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
          ff_er_frame_start(s);
  
          v->bits = buf_size * 8;
+        v->end_mb_x = s->mb_width;
          if (v->field_mode) {
              uint8_t *tmp[2];
              s->current_picture.f.linesize[0] <<= 1;
@@ -5597,7 +5615,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                  s->end_mb_y = (i == n_slices     ) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
              else
                  s->end_mb_y = (i <= n_slices1 + 1) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
-            vc1_decode_blocks(v);
+            ff_vc1_decode_blocks(v);
              if (i != n_slices)
                  s->gb = slices[i].gb;
          }
@@ -5613,7 +5631,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
              s->linesize                      >>= 1;
              s->uvlinesize                    >>= 1;
          }
-//av_log(s->avctx, AV_LOG_INFO, "Consumed %i/%i bits\n", get_bits_count(&s->gb), s->gb.size_in_bits);
+        av_dlog(s->avctx, "Consumed %i/%i bits\n",
+                get_bits_count(&s->gb), s->gb.size_in_bits);
  //  if (get_bits_count(&s->gb) > buf_size * 8)
  //      return -1;
          ff_er_frame_end(s);
@@ -5621,7 +5640,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
  
      ff_MPV_frame_end(s);
  
-    if (avctx->codec_id == CODEC_ID_WMV3IMAGE || avctx->codec_id == CODEC_ID_VC1IMAGE) {
+    if (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
  image:
          avctx->width  = avctx->coded_width  = v->output_width;
          avctx->height = avctx->coded_height = v->output_height;
@@ -5632,7 +5651,7 @@ image:
              goto err;
  #endif
          *pict      = v->sprite_output_frame;
-        *data_size = sizeof(AVFrame);
+        *got_frame = 1;
      } else {
          if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
              *pict = s->current_picture_ptr->f;
@@ -5640,7 +5659,7 @@ image:
              *pict = s->last_picture_ptr->f;
          }
          if (s->last_picture_ptr || s->low_delay) {
-            *data_size = sizeof(AVFrame);
+            *got_frame = 1;
              ff_print_debug_info(s, pict);
          }
      }
@@ -5672,11 +5691,12 @@ static const AVProfile profiles[] = {
  AVCodec ff_vc1_decoder = {
      .name           = "vc1",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_VC1,
+    .id             = AV_CODEC_ID_VC1,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
+    .flush          = ff_mpeg_flush,
      .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
      .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
      .pix_fmts       = ff_hwaccel_pixfmt_list_420,
@@ -5687,11 +5707,12 @@ AVCodec ff_vc1_decoder = {
  AVCodec ff_wmv3_decoder = {
      .name           = "wmv3",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_WMV3,
+    .id             = AV_CODEC_ID_WMV3,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
+    .flush          = ff_mpeg_flush,
      .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
      .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
      .pix_fmts       = ff_hwaccel_pixfmt_list_420,
@@ -5703,14 +5724,14 @@ AVCodec ff_wmv3_decoder = {
  AVCodec ff_wmv3_vdpau_decoder = {
      .name           = "wmv3_vdpau",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_WMV3,
+    .id             = AV_CODEC_ID_WMV3,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
      .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
      .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
-    .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_VDPAU_WMV3, PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_WMV3, AV_PIX_FMT_NONE },
      .profiles       = NULL_IF_CONFIG_SMALL(profiles)
  };
  #endif
@@ -5719,14 +5740,14 @@ AVCodec ff_wmv3_vdpau_decoder = {
  AVCodec ff_vc1_vdpau_decoder = {
      .name           = "vc1_vdpau",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_VC1,
+    .id             = AV_CODEC_ID_VC1,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
      .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
      .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
-    .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_VDPAU_VC1, PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_VC1, AV_PIX_FMT_NONE },
      .profiles       = NULL_IF_CONFIG_SMALL(profiles)
  };
  #endif
@@ -5735,10 +5756,10 @@ AVCodec ff_vc1_vdpau_decoder = {
  AVCodec ff_wmv3image_decoder = {
      .name           = "wmv3image",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_WMV3IMAGE,
+    .id             = AV_CODEC_ID_WMV3IMAGE,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
      .capabilities   = CODEC_CAP_DR1,
      .flush          = vc1_sprite_flush,
@@ -5751,10 +5772,10 @@ AVCodec ff_wmv3image_decoder = {
  AVCodec ff_vc1image_decoder = {
      .name           = "vc1image",
      .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = CODEC_ID_VC1IMAGE,
+    .id             = AV_CODEC_ID_VC1IMAGE,
      .priv_data_size = sizeof(VC1Context),
      .init           = vc1_decode_init,
-    .close          = vc1_decode_end,
+    .close          = ff_vc1_decode_end,
      .decode         = vc1_decode_frame,
      .capabilities   = CODEC_CAP_DR1,
      .flush          = vc1_sprite_flush,