Add rule for building API example programs.

[ffmpeg] / libavcodec / vc1.c
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c

index 9dd6ba801d46360324a6d2d79882d6d7863d9633..229c8850600a9a1ed1ff94d96808614fe3bc1ac7 100644 (file)
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -21,10 +21,11 @@
   */
  
  /**
- * @file vc1.c
+ * @file libavcodec/vc1.c
   * VC-1 and WMV3 decoder
   *
   */
+#include "internal.h"
  #include "dsputil.h"
  #include "avcodec.h"
  #include "mpegvideo.h"
@@ -34,6 +35,8 @@
  #include "msmpeg4data.h"
  #include "unary.h"
  #include "simple_idct.h"
+#include "mathops.h"
+#include "vdpau_internal.h"
  
  #undef NDEBUG
  #include <assert.h>
@@ -114,12 +117,12 @@ static int vc1_init_common(VC1Context *v)
  
  /***********************************************************************/
  /**
- * @defgroup bitplane VC9 Bitplane decoding
+ * @defgroup vc1bitplane VC-1 Bitplane decoding
   * @see 8.7, p56
   * @{
   */
  
-/** @addtogroup bitplane
+/**
   * Imode types
   * @{
   */
@@ -175,7 +178,8 @@ static void decode_colskip(uint8_t* plane, int width, int height, int stride, Ge
  }
  
  /** Decode a bitplane's bits
- * @param bp Bitplane where to store the decode bits
+ * @param data bitplane where to store the decode bits
+ * @param[out] raw_flag pointer to the flag indicating that this bitplane is not coded explicitly
   * @param v VC-1 context for bit reading and logging
   * @return Status
   * @todo FIXME: Optimize
@@ -310,39 +314,51 @@ static int bitplane_decoding(uint8_t* data, int *raw_flag, VC1Context *v)
  /**
   * VC-1 in-loop deblocking filter for one line
   * @param src source block type
+ * @param stride block stride
   * @param pq block quantizer
   * @return whether other 3 pairs should be filtered or not
   * @see 8.6
   */
-static int vc1_filter_line(uint8_t* src, int stride, int pq){
-    int a0, a1, a2, a3, d, clip, filt3 = 0;
+static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
      uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
-    a0     = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
-    if(FFABS(a0) < pq){
-        a1 = (2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3;
-        a2 = (2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3;
-        a3 = FFMIN(FFABS(a1), FFABS(a2));
-        if(a3 < FFABS(a0)){
-            d = 5 * ((a0 >=0 ? a3 : -a3) - a0) / 8;
-            clip = (src[-1*stride] - src[ 0*stride])/2;
+    int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
+    int a0_sign = a0 >> 31;        /* Store sign */
+    a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
+    if(a0 < pq){
+        int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3);
+        int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3);
+        if(a1 < a0 || a2 < a0){
+            int clip = src[-1*stride] - src[ 0*stride];
+            int clip_sign = clip >> 31;
+            clip = ((clip ^ clip_sign) - clip_sign)>>1;
              if(clip){
-                filt3 = 1;
-                if(clip > 0)
-                    d = av_clip(d, 0, clip);
-                else
-                    d = av_clip(d, clip, 0);
-                src[-1*stride] = cm[src[-1*stride] - d];
-                src[ 0*stride] = cm[src[ 0*stride] + d];
+                int a3 = FFMIN(a1, a2);
+                int d = 5 * (a3 - a0);
+                int d_sign = (d >> 31);
+                d = ((d ^ d_sign) - d_sign) >> 3;
+                d_sign ^= a0_sign;
+
+                if( d_sign ^ clip_sign )
+                    d = 0;
+                else{
+                    d = FFMIN(d, clip);
+                    d = (d ^ d_sign) - d_sign;          /* Restore sign */
+                    src[-1*stride] = cm[src[-1*stride] - d];
+                    src[ 0*stride] = cm[src[ 0*stride] + d];
+                }
+                return 1;
              }
          }
      }
-    return filt3;
+    return 0;
  }
  
  /**
   * VC-1 in-loop deblocking filter
   * @param src source block type
+ * @param step distance between horizontally adjacent elements
+ * @param stride distance between vertically adjacent elements
   * @param len edge length to filter (4 or 8 pixels)
   * @param pq block quantizer
   * @see 8.6
@@ -877,6 +893,8 @@ static int decode_sequence_header(AVCodecContext *avctx, GetBitContext *gb)
          av_log(avctx, AV_LOG_ERROR,
                 "LOOPFILTER shell not be enabled in simple profile\n");
      }
+    if(v->s.avctx->skip_loop_filter >= AVDISCARD_ALL)
+        v->s.loop_filter = 0;
  
      v->res_x8 = get_bits1(gb); //reserved
      v->multires = get_bits1(gb);
@@ -1005,8 +1023,8 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
      if(get_bits1(gb)) { //Display Info - decoding is not affected by it
          int w, h, ar = 0;
          av_log(v->s.avctx, AV_LOG_DEBUG, "Display extended info:\n");
-        v->s.avctx->width  = v->s.width  = w = get_bits(gb, 14) + 1;
-        v->s.avctx->height = v->s.height = h = get_bits(gb, 14) + 1;
+        v->s.avctx->coded_width  = w = get_bits(gb, 14) + 1;
+        v->s.avctx->coded_height = h = get_bits(gb, 14) + 1;
          av_log(v->s.avctx, AV_LOG_DEBUG, "Display dimensions: %ix%i\n", w, h);
          if(get_bits1(gb))
              ar = get_bits(gb, 4);
@@ -1017,6 +1035,7 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
              h = get_bits(gb, 8);
              v->s.avctx->sample_aspect_ratio = (AVRational){w, h};
          }
+        av_log(v->s.avctx, AV_LOG_DEBUG, "Aspect: %i:%i\n", v->s.avctx->sample_aspect_ratio.num, v->s.avctx->sample_aspect_ratio.den);
  
          if(get_bits1(gb)){ //framerate stuff
              if(get_bits1(gb)) {
@@ -1057,13 +1076,13 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
  static int decode_entry_point(AVCodecContext *avctx, GetBitContext *gb)
  {
      VC1Context *v = avctx->priv_data;
-    int i, blink, clentry, refdist;
+    int i;
  
      av_log(avctx, AV_LOG_DEBUG, "Entry point: %08X\n", show_bits_long(gb, 32));
-    blink = get_bits1(gb); // broken link
-    clentry = get_bits1(gb); // closed entry
+    v->broken_link = get_bits1(gb);
+    v->closed_entry = get_bits1(gb);
      v->panscanflag = get_bits1(gb);
-    refdist = get_bits1(gb); // refdist flag
+    v->refdist_flag = get_bits1(gb);
      v->s.loop_filter = get_bits1(gb);
      v->fastuvmc = get_bits1(gb);
      v->extended_mv = get_bits1(gb);
@@ -1084,20 +1103,20 @@ static int decode_entry_point(AVCodecContext *avctx, GetBitContext *gb)
      }
      if(v->extended_mv)
          v->extended_dmv = get_bits1(gb);
-    if(get_bits1(gb)) {
+    if((v->range_mapy_flag = get_bits1(gb))) {
          av_log(avctx, AV_LOG_ERROR, "Luma scaling is not supported, expect wrong picture\n");
-        skip_bits(gb, 3); // Y range, ignored for now
+        v->range_mapy = get_bits(gb, 3);
      }
-    if(get_bits1(gb)) {
+    if((v->range_mapuv_flag = get_bits1(gb))) {
          av_log(avctx, AV_LOG_ERROR, "Chroma scaling is not supported, expect wrong picture\n");
-        skip_bits(gb, 3); // UV range, ignored for now
+        v->range_mapuv = get_bits(gb, 3);
      }
  
      av_log(avctx, AV_LOG_DEBUG, "Entry point info:\n"
          "BrokenLink=%i, ClosedEntry=%i, PanscanFlag=%i\n"
          "RefDist=%i, Postproc=%i, FastUVMC=%i, ExtMV=%i\n"
          "DQuant=%i, VSTransform=%i, Overlap=%i, Qmode=%i\n",
-        blink, clentry, v->panscanflag, refdist, v->s.loop_filter,
+        v->broken_link, v->closed_entry, v->panscanflag, v->refdist_flag, v->s.loop_filter,
          v->fastuvmc, v->extended_mv, v->dquant, v->vstransform, v->overlap, v->quantizer_mode);
  
      return 0;
@@ -1121,8 +1140,8 @@ static int vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
  
      v->bi_type = 0;
      if(v->s.pict_type == FF_B_TYPE) {
-        v->bfraction = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-        v->bfraction = ff_vc1_bfraction_lut[v->bfraction];
+        v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
+        v->bfraction = ff_vc1_bfraction_lut[v->bfraction_lut_index];
          if(v->bfraction == 0) {
              v->s.pict_type = FF_BI_TYPE;
          }
@@ -1160,12 +1179,7 @@ static int vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
      v->k_y = v->mvrange + 8; //k_y can be 8 9 10 11
      v->range_x = 1 << (v->k_x - 1);
      v->range_y = 1 << (v->k_y - 1);
-    if (v->profile == PROFILE_ADVANCED)
-    {
-        if (v->postprocflag) v->postproc = get_bits1(gb);
-    }
-    else
-        if (v->multires && v->s.pict_type != FF_B_TYPE) v->respic = get_bits(gb, 2);
+    if (v->multires && v->s.pict_type != FF_B_TYPE) v->respic = get_bits(gb, 2);
  
      if(v->res_x8 && (v->s.pict_type == FF_I_TYPE || v->s.pict_type == FF_BI_TYPE)){
          v->x8_type = get_bits1(gb);
@@ -1368,8 +1382,8 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
          v->uvsamp = get_bits1(gb);
      if(v->finterpflag) v->interpfrm = get_bits1(gb);
      if(v->s.pict_type == FF_B_TYPE) {
-        v->bfraction = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-        v->bfraction = ff_vc1_bfraction_lut[v->bfraction];
+        v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
+        v->bfraction = ff_vc1_bfraction_lut[v->bfraction_lut_index];
          if(v->bfraction == 0) {
              v->s.pict_type = FF_BI_TYPE; /* XXX: should not happen here */
          }
@@ -1392,6 +1406,8 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
      else v->halfpq = 0;
      if (v->quantizer_mode == QUANT_FRAME_EXPLICIT)
          v->pquantizer = get_bits1(gb);
+    if(v->postprocflag)
+        v->postproc = get_bits(gb, 2);
  
      if(v->s.pict_type == FF_I_TYPE || v->s.pict_type == FF_P_TYPE) v->use_ic = 0;
  
@@ -1414,8 +1430,6 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
          }
          break;
      case FF_P_TYPE:
-        if(v->postprocflag)
-            v->postproc = get_bits1(gb);
          if (v->extended_mv) v->mvrange = get_unary(gb, 0, 3);
          else v->mvrange = 0;
          v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
@@ -1505,8 +1519,6 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
          }
          break;
      case FF_B_TYPE:
-        if(v->postprocflag)
-            v->postproc = get_bits1(gb);
          if (v->extended_mv) v->mvrange = get_unary(gb, 0, 3);
          else v->mvrange = 0;
          v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
@@ -1579,7 +1591,7 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
  
  /***********************************************************************/
  /**
- * @defgroup block VC-1 Block-level functions
+ * @defgroup vc1block VC-1 Block-level functions
   * @see 7.1.4, p91 and 8.1.1.7, p(1)04
   * @{
   */
@@ -2165,6 +2177,8 @@ static inline void vc1_pred_b_mv(VC1Context *v, int dmv_x[2], int dmv_y[2], int
  /** Get predicted DC value for I-frames only
   * prediction dir: left=0, top=1
   * @param s MpegEncContext
+ * @param overlap flag indicating that overlap filtering is used
+ * @param pq integer part of picture quantizer
   * @param[in] n block index in the current MB
   * @param dc_val_ptr Pointer to DC predictor
   * @param dir_ptr Prediction direction for use in AC prediction
@@ -2225,7 +2239,11 @@ static inline int vc1_i_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
  /** Get predicted DC value
   * prediction dir: left=0, top=1
   * @param s MpegEncContext
+ * @param overlap flag indicating that overlap filtering is used
+ * @param pq integer part of picture quantizer
   * @param[in] n block index in the current MB
+ * @param a_avail flag indicating top block availability
+ * @param c_avail flag indicating left block availability
   * @param dc_val_ptr Pointer to DC predictor
   * @param dir_ptr Prediction direction for use in AC prediction
   */
@@ -2296,9 +2314,10 @@ static inline int vc1_pred_dc(MpegEncContext *s, int overlap, int pq, int n,
      return pred;
  }
  
+/** @} */ // Block group
  
  /**
- * @defgroup std_mb VC1 Macroblock-level functions in Simple/Main Profiles
+ * @defgroup vc1_std_mb VC1 Macroblock-level functions in Simple/Main Profiles
   * @see 7.1.4, p91 and 8.1.1.7, p(1)04
   * @{
   */
@@ -2335,6 +2354,7 @@ static inline int vc1_coded_block_pred(MpegEncContext * s, int n, uint8_t **code
   * @param last Last coefficient
   * @param skip How much zero coefficients to skip
   * @param value Decoded AC coefficient value
+ * @param codingset set of VLC to decode data
   * @see 8.1.3.4
   */
  static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip, int *value, int codingset)
@@ -2398,6 +2418,7 @@ static void vc1_decode_ac_coeff(VC1Context *v, int *last, int *skip, int *value,
  /** Decode intra block in intra frames - should be faster than decode_intra_block
   * @param v VC1Context
   * @param block block to decode
+ * @param[in] n subblock index
   * @param coded are AC coeffs present or not
   * @param codingset set of VLC to decode data
   */
@@ -2563,8 +2584,10 @@ not_coded:
  /** Decode intra block in intra frames - should be faster than decode_intra_block
   * @param v VC1Context
   * @param block block to decode
+ * @param[in] n subblock number
   * @param coded are AC coeffs present or not
   * @param codingset set of VLC to decode data
+ * @param mquant quantizer value for this macroblock
   */
  static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int coded, int codingset, int mquant)
  {
@@ -2759,6 +2782,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
  /** Decode intra block in inter frames - more generic version than vc1_decode_i_block
   * @param v VC1Context
   * @param block block to decode
+ * @param[in] n subblock index
   * @param coded are AC coeffs present or not
   * @param mquant block quantizer
   * @param codingset set of VLC to decode data
@@ -3096,6 +3120,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
      return pat;
  }
  
+/** @} */ // Macroblock group
+
+static const int size_table  [6] = { 0, 2, 3, 4,  5,  8 };
+static const int offset_table[6] = { 0, 1, 3, 7, 15, 31 };
  
  /** Decode one P-frame MB (in Simple/Main profile)
   */
@@ -3109,8 +3137,6 @@ static int vc1_decode_p_mb(VC1Context *v)
      int mqdiff, mquant; /* MB quantization */
      int ttmb = v->ttfrm; /* MB Transform type */
  
-    static const int size_table[6] = { 0, 2, 3, 4, 5, 8 },
-      offset_table[6] = { 0, 1, 3, 7, 15, 31 };
      int mb_has_coeffs = 1; /* last_flag */
      int dmv_x, dmv_y; /* Differential MV components */
      int index, index1; /* LUT indexes */
@@ -3119,6 +3145,7 @@ static int vc1_decode_p_mb(VC1Context *v)
      int dst_idx, off;
      int skipped, fourmv;
      int block_cbp = 0, pat;
+    int apply_loop_filter;
  
      mquant = v->pq; /* Loosy initialization */
  
@@ -3133,6 +3160,7 @@ static int vc1_decode_p_mb(VC1Context *v)
  
      s->dsp.clear_blocks(s->block[0]);
  
+    apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY);
      if (!fourmv) /* 1MV mode */
      {
          if (!skipped)
@@ -3197,14 +3225,14 @@ static int vc1_decode_p_mb(VC1Context *v)
                          if(v->a_avail)
                              s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
                      }
-                    if(v->s.loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
+                    if(apply_loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
                          int left_cbp, top_cbp;
                          if(i & 4){
                              left_cbp = v->cbp[s->mb_x - 1]            >> (i * 4);
                              top_cbp  = v->cbp[s->mb_x - s->mb_stride] >> (i * 4);
                          }else{
-                            left_cbp = (i & 1) ? (pat >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
-                            top_cbp  = (i & 2) ? (pat >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
+                            left_cbp = (i & 1) ? (cbp >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
+                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
                          }
                          if(left_cbp & 0xC)
                              vc1_loop_filter(s->dest[dst_idx] + off, 1, i & 4 ? s->uvlinesize : s->linesize, 8, mquant);
@@ -3214,15 +3242,19 @@ static int vc1_decode_p_mb(VC1Context *v)
                      block_cbp |= 0xF << (i << 2);
                  } else if(val) {
                      int left_cbp = 0, top_cbp = 0, filter = 0;
-                    if(v->s.loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
+                    if(apply_loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
                          filter = 1;
                          if(i & 4){
                              left_cbp = v->cbp[s->mb_x - 1]            >> (i * 4);
                              top_cbp  = v->cbp[s->mb_x - s->mb_stride] >> (i * 4);
                          }else{
-                            left_cbp = (i & 1) ? (pat >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
-                            top_cbp  = (i & 2) ? (pat >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
+                            left_cbp = (i & 1) ? (cbp >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
+                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
                          }
+                        if(left_cbp & 0xC)
+                            vc1_loop_filter(s->dest[dst_idx] + off, 1, i & 4 ? s->uvlinesize : s->linesize, 8, mquant);
+                        if(top_cbp  & 0xA)
+                            vc1_loop_filter(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, 1, 8, mquant);
                      }
                      pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
                      block_cbp |= pat << (i << 2);
@@ -3330,8 +3362,8 @@ static int vc1_decode_p_mb(VC1Context *v)
                              left_cbp = v->cbp[s->mb_x - 1]            >> (i * 4);
                              top_cbp  = v->cbp[s->mb_x - s->mb_stride] >> (i * 4);
                          }else{
-                            left_cbp = (i & 1) ? (pat >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
-                            top_cbp  = (i & 2) ? (pat >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
+                            left_cbp = (i & 1) ? (cbp >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
+                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
                          }
                          if(left_cbp & 0xC)
                              vc1_loop_filter(s->dest[dst_idx] + off, 1, i & 4 ? s->uvlinesize : s->linesize, 8, mquant);
@@ -3347,9 +3379,13 @@ static int vc1_decode_p_mb(VC1Context *v)
                              left_cbp = v->cbp[s->mb_x - 1]            >> (i * 4);
                              top_cbp  = v->cbp[s->mb_x - s->mb_stride] >> (i * 4);
                          }else{
-                            left_cbp = (i & 1) ? (pat >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
-                            top_cbp  = (i & 2) ? (pat >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
+                            left_cbp = (i & 1) ? (cbp >> ((i-1)*4)) : (v->cbp[s->mb_x - 1]           >> ((i+1)*4));
+                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
                          }
+                        if(left_cbp & 0xC)
+                            vc1_loop_filter(s->dest[dst_idx] + off, 1, i & 4 ? s->uvlinesize : s->linesize, 8, mquant);
+                        if(top_cbp  & 0xA)
+                            vc1_loop_filter(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, 1, 8, mquant);
                      }
                      pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
                      block_cbp |= pat << (i << 2);
@@ -3394,9 +3430,6 @@ static void vc1_decode_b_mb(VC1Context *v)
      int cbp = 0; /* cbp decoding stuff */
      int mqdiff, mquant; /* MB quantization */
      int ttmb = v->ttfrm; /* MB Transform type */
-
-    static const int size_table[6] = { 0, 2, 3, 4, 5, 8 },
-      offset_table[6] = { 0, 1, 3, 7, 15, 31 };
      int mb_has_coeffs = 0; /* last_flag */
      int index, index1; /* LUT indexes */
      int val, sign; /* temp values */
@@ -3977,9 +4010,10 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
  
      if (!avctx->extradata_size || !avctx->extradata) return -1;
      if (!(avctx->flags & CODEC_FLAG_GRAY))
-        avctx->pix_fmt = PIX_FMT_YUV420P;
+        avctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts);
      else
          avctx->pix_fmt = PIX_FMT_GRAY8;
+    avctx->hwaccel = ff_find_hwaccel(avctx->codec->id, avctx->pix_fmt);
      v->s.avctx = avctx;
      avctx->flags |= CODEC_FLAG_EMU_EDGE;
      v->s.flags |= CODEC_FLAG_EMU_EDGE;
@@ -4109,6 +4143,7 @@ static int vc1_decode_frame(AVCodecContext *avctx,
      MpegEncContext *s = &v->s;
      AVFrame *pict = data;
      uint8_t *buf2 = NULL;
+    const uint8_t *buf_start = buf;
  
      /* no supplementary picture */
      if (buf_size == 0) {
@@ -4130,6 +4165,13 @@ static int vc1_decode_frame(AVCodecContext *avctx,
          s->current_picture_ptr= &s->picture[i];
      }
  
+    if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
+        if (v->profile < PROFILE_ADVANCED)
+            avctx->pix_fmt = PIX_FMT_VDPAU_WMV3;
+        else
+            avctx->pix_fmt = PIX_FMT_VDPAU_VC1;
+    }
+
      //for advanced profile we may need to parse and unescape data
      if (avctx->codec_id == CODEC_ID_VC1) {
          int buf_size2 = 0;
@@ -4146,6 +4188,9 @@ static int vc1_decode_frame(AVCodecContext *avctx,
                  if(size <= 0) continue;
                  switch(AV_RB32(start)){
                  case VC1_CODE_FRAME:
+                    if (avctx->hwaccel ||
+                        s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                        buf_start = start;
                      buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
                      break;
                  case VC1_CODE_ENTRYPOINT: /* it should be before frame data */
@@ -4234,14 +4279,26 @@ static int vc1_decode_frame(AVCodecContext *avctx,
      s->me.qpel_put= s->dsp.put_qpel_pixels_tab;
      s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab;
  
-    ff_er_frame_start(s);
+    if ((CONFIG_VC1_VDPAU_DECODER || CONFIG_WMV3_VDPAU_DECODER)
+        &&s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+        ff_vdpau_vc1_decode_picture(s, buf_start, (buf + buf_size) - buf_start);
+    else if (avctx->hwaccel) {
+        if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0)
+            return -1;
+        if (avctx->hwaccel->decode_slice(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
+            return -1;
+        if (avctx->hwaccel->end_frame(avctx) < 0)
+            return -1;
+    } else {
+        ff_er_frame_start(s);
  
-    v->bits = buf_size * 8;
-    vc1_decode_blocks(v);
+        v->bits = buf_size * 8;
+        vc1_decode_blocks(v);
  //av_log(s->avctx, AV_LOG_INFO, "Consumed %i/%i bits\n", get_bits_count(&s->gb), buf_size*8);
  //  if(get_bits_count(&s->gb) > buf_size * 8)
  //      return -1;
-    ff_er_frame_end(s);
+        ff_er_frame_end(s);
+    }
  
      MPV_frame_end(s);
  
@@ -4300,6 +4357,7 @@ AVCodec vc1_decoder = {
      CODEC_CAP_DELAY,
      NULL,
      .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1"),
+    .pix_fmts = ff_hwaccel_pixfmt_list_420
  };
  
  AVCodec wmv3_decoder = {
@@ -4314,4 +4372,39 @@ AVCodec wmv3_decoder = {
      CODEC_CAP_DELAY,
      NULL,
      .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9"),
+    .pix_fmts = ff_hwaccel_pixfmt_list_420
+};
+
+#if CONFIG_WMV3_VDPAU_DECODER
+AVCodec wmv3_vdpau_decoder = {
+    "wmv3_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_WMV3,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
+    .pix_fmts = (enum PixelFormat[]){PIX_FMT_VDPAU_WMV3, PIX_FMT_NONE}
  };
+#endif
+
+#if CONFIG_VC1_VDPAU_DECODER
+AVCodec vc1_vdpau_decoder = {
+    "vc1_vdpau",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_VC1,
+    sizeof(VC1Context),
+    vc1_decode_init,
+    NULL,
+    vc1_decode_end,
+    vc1_decode_frame,
+    CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    NULL,
+    .long_name = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
+    .pix_fmts = (enum PixelFormat[]){PIX_FMT_VDPAU_VC1, PIX_FMT_NONE}
+};
+#endif