quantizer noise shaping

[ffmpeg] / libavcodec / mpegvideo.c
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index c9fbca0c17add59edeedde4e4e5d6bec7574215d..320fdb2c57806053a7f6d64a5f6442999777d31f 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -1,6 +1,7 @@
  /*
   * The simplest mpeg encoder (well, it was the simplest!)
   * Copyright (c) 2000,2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
@@ -25,6 +26,7 @@
   */ 
   
  #include <limits.h>
+#include <math.h> //for PI
  #include "avcodec.h"
  #include "dsputil.h"
  #include "mpegvideo.h"
@@ -40,17 +42,25 @@
  #ifdef CONFIG_ENCODERS
  static void encode_picture(MpegEncContext *s, int picture_number);
  #endif //CONFIG_ENCODERS
-static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
                                     DCTELEM *block, int n, int qscale);
-static void dct_unquantize_mpeg2_c(MpegEncContext *s,
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
                                     DCTELEM *block, int n, int qscale);
-static void dct_unquantize_h263_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
+                                   DCTELEM *block, int n, int qscale);
+static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
+                                  DCTELEM *block, int n, int qscale);
+static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
                                    DCTELEM *block, int n, int qscale);
  static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
  #ifdef CONFIG_ENCODERS
  static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
+static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  static int sse_mb(MpegEncContext *s);
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  #endif //CONFIG_ENCODERS
  
  #ifdef HAVE_XVMC
@@ -204,12 +214,16 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
  /* init common dct for both encoder and decoder */
  int DCT_common_init(MpegEncContext *s)
  {
-    s->dct_unquantize_h263 = dct_unquantize_h263_c;
-    s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
-    s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
+    s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
+    s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
+    s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
+    s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
  
  #ifdef CONFIG_ENCODERS
      s->dct_quantize= dct_quantize_c;
+    s->denoise_dct= denoise_dct_c;
  #endif
          
  #ifdef HAVE_MMX
@@ -243,8 +257,13 @@ int DCT_common_init(MpegEncContext *s)
      /* load & permutate scantables
         note: only wmv uses differnt ones 
      */
-    ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
-    ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+    if(s->alternate_scan){
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
+    }else{
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+    }
      ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
      ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
  
@@ -258,6 +277,17 @@ static void copy_picture(Picture *dst, Picture *src){
      dst->type= FF_BUFFER_TYPE_COPY;
  }
  
+static void copy_picture_attributes(AVFrame *dst, AVFrame *src){
+    dst->pict_type              = src->pict_type;
+    dst->quality                = src->quality;
+    dst->coded_picture_number   = src->coded_picture_number;
+    dst->display_picture_number = src->display_picture_number;
+//    dst->reference              = src->reference;
+    dst->pts                    = src->pts;
+    dst->interlaced_frame       = src->interlaced_frame;
+    dst->top_field_first        = src->top_field_first;
+}
+
  /**
   * allocates a Picture
   * The pixels are allocated/set by calling get_buffer() if shared=0
@@ -308,18 +338,18 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
  
          CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
          CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
-        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
+        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
          pic->mb_type= pic->mb_type_base + s->mb_stride+1;
          if(s->out_format == FMT_H264){
              for(i=0; i<2; i++){
-                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+1)  * sizeof(uint16_t))
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+1)  * sizeof(int16_t))
                  pic->motion_val[i]= pic->motion_val_base[i]+1;
                  CHECKED_ALLOCZ(pic->ref_index[i] , b8_array_size * sizeof(uint8_t))
              }
              pic->motion_subsample_log2= 2;
-        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&(FF_DEBUG_VIS_MV|FF_DEBUG_MV))){
+        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
              for(i=0; i<2; i++){
-                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+1) * sizeof(uint16_t)*2) //FIXME
+                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+1) * sizeof(int16_t)*2) //FIXME
                  pic->motion_val[i]= pic->motion_val_base[i]+1;
              }
              pic->motion_subsample_log2= 3;
@@ -374,12 +404,13 @@ static void free_picture(MpegEncContext *s, Picture *pic){
  /* init common structure for both encoder and decoder */
  int MPV_common_init(MpegEncContext *s)
  {
-    int y_size, c_size, yc_size, i, mb_array_size, x, y;
+    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
  
      dsputil_init(&s->dsp, s->avctx);
      DCT_common_init(s);
  
      s->flags= s->avctx->flags;
+    s->flags2= s->avctx->flags2;
  
      s->mb_width  = (s->width  + 15) / 16;
      s->mb_height = (s->height + 15) / 16;
@@ -387,6 +418,7 @@ int MPV_common_init(MpegEncContext *s)
      s->b8_stride = s->mb_width*2 + 1;
      s->b4_stride = s->mb_width*4 + 1;
      mb_array_size= s->mb_height * s->mb_stride;
+    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
  
      /* set default edge pos, will be overriden in decode_header if needed */
      s->h_edge_pos= s->mb_width*16;
@@ -404,9 +436,11 @@ int MPV_common_init(MpegEncContext *s)
      s->y_dc_scale_table=
      s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
      s->chroma_qscale_table= ff_default_chroma_qscale_table;
-    s->progressive_sequence= 
+    if (!s->encoding)
+        s->progressive_sequence= 1;
      s->progressive_frame= 1;
-    
+    s->coded_picture_number = 0;
+
      y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
      c_size = (s->mb_width + 2) * (s->mb_height + 2);
      yc_size = y_size + 2 * c_size;
@@ -436,8 +470,6 @@ int MPV_common_init(MpegEncContext *s)
      s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
      
      if (s->encoding) {
-        int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
-
          /* Allocate MV tables */
          CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
          CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
@@ -469,7 +501,7 @@ int MPV_common_init(MpegEncContext *s)
          CHECKED_ALLOCZ(s->avctx->stats_out, 256);
  
          /* Allocate MB type table */
-        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
          
          CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
          
@@ -491,10 +523,21 @@ int MPV_common_init(MpegEncContext *s)
  
      CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
      
-    if(s->codec_id==CODEC_ID_MPEG4){
+    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
          /* interlaced direct mode decoding tables */
-        CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
+            for(i=0; i<2; i++){
+                int j, k;
+                for(j=0; j<2; j++){
+                    for(k=0; k<2; k++){
+                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
+                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
+                    }
+                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
+                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
+                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
+                }
+                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
+            }
      }
      if (s->out_format == FMT_H263) {
          /* ac values */
@@ -542,6 +585,11 @@ int MPV_common_init(MpegEncContext *s)
      }
  
      s->parse_context.state= -1;
+    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
+       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
+       s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
+       s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
+    }
  
      s->context_initialized = 1;
      return 0;
@@ -556,7 +604,7 @@ int MPV_common_init(MpegEncContext *s)
  /* init common structure for both encoder and decoder */
  void MPV_common_end(MpegEncContext *s)
  {
-    int i;
+    int i, j, k;
  
      av_freep(&s->parse_context.buffer);
      s->parse_context.buffer_size=0;
@@ -574,6 +622,18 @@ void MPV_common_end(MpegEncContext *s)
      s->b_bidir_forw_mv_table= NULL;
      s->b_bidir_back_mv_table= NULL;
      s->b_direct_mv_table= NULL;
+    for(i=0; i<2; i++){
+        for(j=0; j<2; j++){
+            for(k=0; k<2; k++){
+                av_freep(&s->b_field_mv_table_base[i][j][k]);
+                s->b_field_mv_table[i][j][k]=NULL;
+            }
+            av_freep(&s->b_field_select_table[i][j]);
+            av_freep(&s->p_field_mv_table_base[i][j]);
+            s->p_field_mv_table[i][j]=NULL;
+        }
+        av_freep(&s->p_field_select_table[i]);
+    }
      
      av_freep(&s->dc_val[0]);
      av_freep(&s->ac_val[0]);
@@ -591,8 +651,6 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->tex_pb_buffer);
      av_freep(&s->pb2_buffer);
      av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
-    av_freep(&s->field_mv_table);
-    av_freep(&s->field_select_table);
      av_freep(&s->avctx->stats_out);
      av_freep(&s->ac_stats);
      av_freep(&s->error_status_table);
@@ -619,6 +677,9 @@ void MPV_common_end(MpegEncContext *s)
      s->last_picture_ptr=
      s->next_picture_ptr=
      s->current_picture_ptr= NULL;
+    for(i=0; i<3; i++)
+        if (s->visualization_buffer[i])
+            av_free(s->visualization_buffer[i]);
  }
  
  #ifdef CONFIG_ENCODERS
@@ -633,7 +694,6 @@ int MPV_encode_init(AVCodecContext *avctx)
      avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME
  
      s->bit_rate = avctx->bit_rate;
-    s->bit_rate_tolerance = avctx->bit_rate_tolerance;
      s->width = avctx->width;
      s->height = avctx->height;
      if(avctx->gop_size > 600){
@@ -641,13 +701,10 @@ int MPV_encode_init(AVCodecContext *avctx)
          avctx->gop_size=600;
      }
      s->gop_size = avctx->gop_size;
-    s->max_qdiff= avctx->max_qdiff;
-    s->qcompress= avctx->qcompress;
-    s->qblur= avctx->qblur;
      s->avctx = avctx;
      s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
      s->max_b_frames= avctx->max_b_frames;
-    s->b_frame_strategy= avctx->b_frame_strategy;
      s->codec_id= avctx->codec->id;
      s->luma_elim_threshold  = avctx->luma_elim_threshold;
      s->chroma_elim_threshold= avctx->chroma_elim_threshold;
@@ -667,7 +724,7 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->me_method = avctx->me_method;
  
      /* Fixed QSCALE */
-    s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
+    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
      
      s->adaptive_quant= (   s->avctx->lumi_masking
                          || s->avctx->dark_masking
@@ -677,17 +734,25 @@ int MPV_encode_init(AVCodecContext *avctx)
                          || (s->flags&CODEC_FLAG_QP_RD))
                         && !s->fixed_qscale;
      
-    s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
-    
-    s->obmc= (s->flags & CODEC_FLAG_OBMC);
-    s->loop_filter= (s->flags & CODEC_FLAG_LOOP_FILTER);
+    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
+    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
+    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
+
+    if(avctx->rc_max_rate && !avctx->rc_buffer_size){
+        av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
+        return -1;
+    }    
  
+    if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
+        av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n");
+    }    
+        
      if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
         && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
          av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
          return -1;
      }
-    
+        
      if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
          av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
          return -1;
@@ -728,6 +793,19 @@ int MPV_encode_init(AVCodecContext *avctx)
          return -1;
      }
      
+    if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
+        av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
+        return -1;
+    }
+
+    i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base);
+    if(i > 1){
+        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
+        avctx->frame_rate /= i;
+        avctx->frame_rate_base /= i;
+//        return -1;
+    }
+    
      if(s->codec_id==CODEC_ID_MJPEG){
          s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
          s->inter_quant_bias= 0;
@@ -801,8 +879,6 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
         s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
          s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
-        if(s->modified_quant)
-            s->chroma_qscale_table= ff_h263_chroma_qscale_table;
  
         /* /Fx */
          /* These are just to be sure */
@@ -881,7 +957,7 @@ int MPV_encode_init(AVCodecContext *avctx)
      default:
          return -1;
      }
-    
+
      { /* set up some save defaults, some codecs might override them later */
          static int done=0;
          if(!done){
@@ -909,6 +985,13 @@ int MPV_encode_init(AVCodecContext *avctx)
      /* init */
      if (MPV_common_init(s) < 0)
          return -1;
+
+    if(s->modified_quant)
+        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
+    s->progressive_frame= 
+    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
+    
+    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
      
      ff_init_me(s);
  
@@ -958,8 +1041,8 @@ int MPV_encode_init(AVCodecContext *avctx)
          return -1;
  
      s->picture_number = 0;
+    s->input_picture_number = 0;
      s->picture_in_gop_number = 0;
-    s->fake_picture_number = 0;
      /* motion detector init */
      s->f_code = 1;
      s->b_code = 1;
@@ -1136,8 +1219,7 @@ alloc:
  
          pic->reference= s->pict_type != B_TYPE ? 3 : 0;
  
-        if(s->current_picture_ptr) //FIXME broken, we need a coded_picture_number in MpegEncContext
-            pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
+        pic->coded_picture_number= s->coded_picture_number++;
          
          if( alloc_picture(s, (Picture*)pic, 0) < 0)
              return -1;
@@ -1189,12 +1271,16 @@ alloc:
  
      /* set dequantizer, we cant do it during init as it might change for mpeg4
         and we cant do it in the header decode as init isnt called for mpeg4 there yet */
-    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO) 
-        s->dct_unquantize = s->dct_unquantize_mpeg2;
-    else if(s->out_format == FMT_H263)
-        s->dct_unquantize = s->dct_unquantize_h263;
-    else 
-        s->dct_unquantize = s->dct_unquantize_mpeg1;
+    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
+        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
+    }else if(s->out_format == FMT_H263){
+        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
+    }else{
+        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
+        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
+    }
  
      if(s->dct_error_sum){
          assert(s->avctx->noise_reduction && s->encoding);
@@ -1339,13 +1425,22 @@ static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int
  /**
   * prints debuging info for the given picture.
   */
-void ff_print_debug_info(MpegEncContext *s, Picture *pict){
+void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
  
      if(!pict || !pict->mb_type) return;
  
      if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
          int x,y;
-
+        
+        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
+        switch (pict->pict_type) {
+            case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
+            case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
+            case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
+            case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
+            case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
+            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
+        }
          for(y=0; y<s->mb_height; y++){
              for(x=0; x<s->mb_width; x++){
                  if(s->avctx->debug&FF_DEBUG_SKIP){
@@ -1358,7 +1453,6 @@ void ff_print_debug_info(MpegEncContext *s, Picture *pict){
                  }
                  if(s->avctx->debug&FF_DEBUG_MB_TYPE){
                      int mb_type= pict->mb_type[x + y*s->mb_stride];
-                    
                      //Type & MV direction
                      if(IS_PCM(mb_type))
                          av_log(s->avctx, AV_LOG_DEBUG, "P");
@@ -1411,43 +1505,140 @@ void ff_print_debug_info(MpegEncContext *s, Picture *pict){
          }
      }
  
-    if((s->avctx->debug&FF_DEBUG_VIS_MV) && pict->motion_val){
+    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
          const int shift= 1 + s->quarter_sample;
          int mb_y;
-        uint8_t *ptr= pict->data[0];
+        uint8_t *ptr;
+        int i;
+        int h_chroma_shift, v_chroma_shift;
          s->low_delay=0; //needed to see the vectors without trashing the buffers
  
+        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+        for(i=0; i<3; i++){
+            memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*s->height:pict->linesize[i]*s->height >> v_chroma_shift);
+            pict->data[i]= s->visualization_buffer[i];
+        }
+        pict->type= FF_BUFFER_TYPE_COPY;
+        ptr= pict->data[0];
+
          for(mb_y=0; mb_y<s->mb_height; mb_y++){
              int mb_x;
              for(mb_x=0; mb_x<s->mb_width; mb_x++){
                  const int mb_index= mb_x + mb_y*s->mb_stride;
-                if(IS_8X8(pict->mb_type[mb_index])){
-                    int i;
-                    for(i=0; i<4; i++){
+                if((s->avctx->debug_mv) && pict->motion_val){
+                  int type;
+                  for(type=0; type<3; type++){
+                    int direction;
+                    switch (type) {
+                      case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
+                                continue;
+                              direction = 0;
+                              break;
+                      case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
+                                continue;
+                              direction = 0;
+                              break;
+                      case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
+                                continue;
+                              direction = 1;
+                              break;
+                    }
+                    if(!USES_LIST(pict->mb_type[mb_index], direction))
+                        continue;
+
+                    if(IS_8X8(pict->mb_type[mb_index])){
+                      int i;
+                      for(i=0; i<4; i++){
                          int sx= mb_x*16 + 4 + 8*(i&1);
                          int sy= mb_y*16 + 4 + 8*(i>>1);
                          int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
-                        int mx= (pict->motion_val[0][xy][0]>>shift) + sx;
-                        int my= (pict->motion_val[0][xy][1]>>shift) + sy;
+                        int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
+                        int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
                          draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
-                    }
-                }else if(IS_16X8(pict->mb_type[mb_index])){
-                    int i;
-                    for(i=0; i<2; i++){
+                      }
+                    }else if(IS_16X8(pict->mb_type[mb_index])){
+                      int i;
+                      for(i=0; i<2; i++){
                          int sx=mb_x*16 + 8;
                          int sy=mb_y*16 + 4 + 8*i;
                          int xy=1 + mb_x*2 + (mb_y*2 + 1 + i)*(s->mb_width*2 + 2);
-                        int mx=(pict->motion_val[0][xy][0]>>shift) + sx;
-                        int my=(pict->motion_val[0][xy][1]>>shift) + sy;
+                        int mx=(pict->motion_val[direction][xy][0]>>shift) + sx;
+                        int my=(pict->motion_val[direction][xy][1]>>shift) + sy;
                          draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+                      }
+                    }else{
+                      int sx= mb_x*16 + 8;
+                      int sy= mb_y*16 + 8;
+                      int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
+                      int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
+                      int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
+                      draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+                    }
+                  }                  
+                }
+                if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
+                    uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
+                    int y;
+                    for(y=0; y<8; y++){
+                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
+                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
+                    }
+                }
+                if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
+                    int mb_type= pict->mb_type[mb_index];
+                    uint64_t u,v;
+                    int y;
+#define COLOR(theta, r)\
+u= (int)(128 + r*cos(theta*3.141592/180));\
+v= (int)(128 + r*sin(theta*3.141592/180));
+
+                    
+                    u=v=128;
+                    if(IS_PCM(mb_type)){
+                        COLOR(120,48)
+                    }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
+                        COLOR(30,48)
+                    }else if(IS_INTRA4x4(mb_type)){
+                        COLOR(90,48)
+                    }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
+//                        COLOR(120,48)
+                    }else if(IS_DIRECT(mb_type)){
+                        COLOR(150,48)
+                    }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
+                        COLOR(170,48)
+                    }else if(IS_GMC(mb_type)){
+                        COLOR(190,48)
+                    }else if(IS_SKIP(mb_type)){
+//                        COLOR(180,48)
+                    }else if(!USES_LIST(mb_type, 1)){
+                        COLOR(240,48)
+                    }else if(!USES_LIST(mb_type, 0)){
+                        COLOR(0,48)
+                    }else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        COLOR(300,48)
+                    }
+
+                    u*= 0x0101010101010101ULL;
+                    v*= 0x0101010101010101ULL;
+                    for(y=0; y<8; y++){
+                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
+                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
+                    }
+
+                    //segmentation
+                    if(IS_8X8(mb_type) || IS_16X8(mb_type)){
+                        *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
+                        *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
+                    }
+                    if(IS_8X8(mb_type) || IS_8X16(mb_type)){
+                        for(y=0; y<16; y++)
+                            pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
+                    }
+                        
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
+                        // hmm
                      }
-                }else{
-                    int sx= mb_x*16 + 8;
-                    int sy= mb_y*16 + 8;
-                    int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
-                    int mx= (pict->motion_val[0][xy][0]>>shift) + sx;
-                    int my= (pict->motion_val[0][xy][1]>>shift) + sy;
-                    draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
                  }
                  s->mbskip_table[mb_index]=0;
              }
@@ -1480,7 +1671,7 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int st
      for(y=0; y<h; y+=16){
          for(x=0; x<w; x+=16){
              int offset= x + y*stride;
-            int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
+            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
              int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
              int sae = get_sae(src + offset, mean, stride);
              
@@ -1555,13 +1746,9 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
              }
          }
      }
-    pic->quality= pic_arg->quality;
-    pic->pict_type= pic_arg->pict_type;
-    pic->pts = pic_arg->pts;
-    
-    if(s->input_picture[encoding_delay])
-        pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
+    copy_picture_attributes(pic, pic_arg);
      
+    pic->display_picture_number= s->input_picture_number++;
    }
  
      /* shift buffer entries */
@@ -1575,10 +1762,6 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
  
  static void select_input_picture(MpegEncContext *s){
      int i;
-    int coded_pic_num=0;    
-
-    if(s->reordered_input_picture[0])
-        coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
  
      for(i=1; i<MAX_PICTURE_COUNT; i++)
          s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
@@ -1589,7 +1772,7 @@ static void select_input_picture(MpegEncContext *s){
          if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
              s->reordered_input_picture[0]= s->input_picture[0];
              s->reordered_input_picture[0]->pict_type= I_TYPE;
-            s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
+            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
          }else{
              int b_frames;
              
@@ -1613,10 +1796,10 @@ static void select_input_picture(MpegEncContext *s){
                      av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n");
                      b_frames = s->max_b_frames;
                  }
-            }else if(s->b_frame_strategy==0){
+            }else if(s->avctx->b_frame_strategy==0){
                  b_frames= s->max_b_frames;
                  while(b_frames && !s->input_picture[b_frames]) b_frames--;
-            }else if(s->b_frame_strategy==1){
+            }else if(s->avctx->b_frame_strategy==1){
                  for(i=1; i<s->max_b_frames+1; i++){
                      if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
                          s->input_picture[i]->b_frame_score= 
@@ -1643,19 +1826,25 @@ static void select_input_picture(MpegEncContext *s){
  //static int b_count=0;
  //b_count+= b_frames;
  //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
-                        
+            if(s->picture_in_gop_number + b_frames >= s->gop_size){
+                if(s->flags & CODEC_FLAG_CLOSED_GOP)
+                    b_frames=0;
+                s->input_picture[b_frames]->pict_type= I_TYPE;
+            }
+            
+            if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
+               && b_frames
+               && s->input_picture[b_frames]->pict_type== I_TYPE)
+                b_frames--;
+
              s->reordered_input_picture[0]= s->input_picture[b_frames];
-            if(   s->picture_in_gop_number + b_frames >= s->gop_size 
-               || s->reordered_input_picture[0]->pict_type== I_TYPE)
-                s->reordered_input_picture[0]->pict_type= I_TYPE;
-            else
+            if(s->reordered_input_picture[0]->pict_type != I_TYPE)
                  s->reordered_input_picture[0]->pict_type= P_TYPE;
-            s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
+            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
              for(i=0; i<b_frames; i++){
-                coded_pic_num++;
                  s->reordered_input_picture[i+1]= s->input_picture[i];
                  s->reordered_input_picture[i+1]->pict_type= B_TYPE;
-                s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
+                s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
              }
          }
      }
@@ -1676,12 +1865,8 @@ static void select_input_picture(MpegEncContext *s){
                  s->reordered_input_picture[0]->data[i]= NULL;
              s->reordered_input_picture[0]->type= 0;
              
-            //FIXME bad, copy * except
-            pic->pict_type = s->reordered_input_picture[0]->pict_type;
-            pic->quality   = s->reordered_input_picture[0]->quality;
-            pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
-            pic->reference = s->reordered_input_picture[0]->reference;
-           pic->pts = s->reordered_input_picture[0]->pts;
+            copy_picture_attributes((AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
+            pic->reference              = s->reordered_input_picture[0]->reference;
              
              alloc_picture(s, pic, 0);
  
@@ -1711,7 +1896,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
  {
      MpegEncContext *s = avctx->priv_data;
      AVFrame *pic_arg = data;
-    int i;
+    int i, stuffing_count;
  
      if(avctx->pix_fmt != PIX_FMT_YUV420P){
          av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
@@ -1728,7 +1913,6 @@ int MPV_encode_picture(AVCodecContext *avctx,
      
      /* output? */
      if(s->new_picture.data[0]){
-
          s->pict_type= s->new_picture.pict_type;
  //emms_c();
  //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
@@ -1757,17 +1941,58 @@ int MPV_encode_picture(AVCodecContext *avctx,
          for(i=0; i<4; i++){
              avctx->error[i] += s->current_picture_ptr->error[i];
          }
-    }
  
-    s->input_picture_number++;
+        flush_put_bits(&s->pb);
+        s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+
+        stuffing_count= ff_vbv_update(s, s->frame_bits);
+        if(stuffing_count){
+            switch(s->codec_id){
+            case CODEC_ID_MPEG1VIDEO:
+            case CODEC_ID_MPEG2VIDEO:
+                while(stuffing_count--){
+                    put_bits(&s->pb, 8, 0);
+                }
+            break;
+            case CODEC_ID_MPEG4:
+                put_bits(&s->pb, 16, 0);
+                put_bits(&s->pb, 16, 0x1C3);
+                stuffing_count -= 4;
+                while(stuffing_count--){
+                    put_bits(&s->pb, 8, 0xFF);
+                }
+            break;
+            default:
+                av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
+            }
+            flush_put_bits(&s->pb);
+            s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
+        }
+
+        /* update mpeg1/2 vbv_delay for CBR */    
+        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate){
+            int vbv_delay;
  
-    flush_put_bits(&s->pb);
-    s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
-    
-    s->total_bits += s->frame_bits;
-    avctx->frame_bits  = s->frame_bits;
+            assert(s->repeat_first_field==0);
+            
+            vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
+            assert(vbv_delay < 0xFFFF);
+
+            s->vbv_delay_ptr[0] &= 0xF8;
+            s->vbv_delay_ptr[0] |= vbv_delay>>13;
+            s->vbv_delay_ptr[1]  = vbv_delay>>5;
+            s->vbv_delay_ptr[2] &= 0x07;
+            s->vbv_delay_ptr[2] |= vbv_delay<<3;
+        }
+        s->total_bits += s->frame_bits;
+        avctx->frame_bits  = s->frame_bits;
+    }else{
+        assert((pbBufPtr(&s->pb) == s->pb.buf));
+        s->frame_bits=0;
+    }
+    assert((s->frame_bits&7)==0);
      
-    return pbBufPtr(&s->pb) - s->pb.buf;
+    return s->frame_bits/8;
  }
  
  #endif //CONFIG_ENCODERS
@@ -2657,7 +2882,7 @@ static inline void MPV_motion(MpegEncContext *s,
  static inline void put_dct(MpegEncContext *s, 
                             DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
  {
-    s->dct_unquantize(s, block, i, qscale);
+    s->dct_unquantize_intra(s, block, i, qscale);
      s->dsp.idct_put (dest, line_size, block);
  }
  
@@ -2674,7 +2899,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
                             DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
  {
      if (s->block_last_index[i] >= 0) {
-        s->dct_unquantize(s, block, i, qscale);
+        s->dct_unquantize_inter(s, block, i, qscale);
  
          s->dsp.idct_add (dest, line_size, block);
      }
@@ -2946,6 +3171,7 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
      int i;
      const int maxlevel= s->max_qcoeff;
      const int minlevel= s->min_qcoeff;
+    int overflow=0;
      
      if(s->mb_intra){
          i=1; //skip clipping of intra dc
@@ -2956,78 +3182,21 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
          const int j= s->intra_scantable.permutated[i];
          int level = block[j];
         
-        if     (level>maxlevel) level=maxlevel;
-        else if(level<minlevel) level=minlevel;
-
-        block[j]= level;
-    }
-}
-
-#if 0
-static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x+=4){
-            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
-                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
-        }
-        s+= stride;
-    }
-    
-    return score;
-}
-
-static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x++){
-            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
-        }
-        s1+= stride;
-        s2+= stride;
-    }
-    
-    return score;
-}
-#else
-#define SQ(a) ((a)*(a))
-
-static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x+=4){
-            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
-                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
-        }
-        s+= stride;
-    }
-    
-    return score;
-}
-
-static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
-    int score=0;
-    int x,y;
-    
-    for(y=0; y<7; y++){
-        for(x=0; x<16; x++){
-            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        if     (level>maxlevel){
+            level=maxlevel;
+            overflow++;
+        }else if(level<minlevel){
+            level=minlevel;
+            overflow++;
          }
-        s1+= stride;
-        s2+= stride;
+        
+        block[j]= level;
      }
      
-    return score;
+    if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
+        av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
  }
  
-#endif
-
  #endif //CONFIG_ENCODERS
  
  /**
@@ -3097,8 +3266,33 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
  
  #ifdef CONFIG_ENCODERS
  
+static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
+    int x, y;
+//FIXME optimize
+    for(y=0; y<8; y++){
+        for(x=0; x<8; x++){
+            int x2, y2;
+            int sum=0;
+            int sqr=0;
+            int count=0;
+
+            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
+                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
+                    int v= ptr[x2 + y2*stride];
+                    sum += v;
+                    sqr += v*v;
+                    count++;
+                }
+            }
+            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
+        }
+    }
+}
+
  static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
  {
+    int16_t weight[6][64];
+    DCTELEM orig[6][64];
      const int mb_x= s->mb_x;
      const int mb_y= s->mb_y;
      int i;
@@ -3131,57 +3325,71 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
      }
  
      if (s->mb_intra) {
-        uint8_t *ptr;
-        int wrap_y;
+        uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+        int wrap_y, wrap_c;
          int emu=0;
  
          wrap_y = s->linesize;
-        ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+        wrap_c = s->uvlinesize;
+        ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+        ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
+        ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
  
          if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
-            ptr= s->edge_emu_buffer;
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+            ptr_y= s->edge_emu_buffer;
              emu=1;
          }
          
          if(s->flags&CODEC_FLAG_INTERLACED_DCT){
              int progressive_score, interlaced_score;
+
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
+                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
+
+            if(progressive_score > 0){
+                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
+                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
              
-            progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
-            interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
-            
-            if(progressive_score > interlaced_score + 100){
-                s->interlaced_dct=1;
-            
-                dct_offset= wrap_y;
-                wrap_y<<=1;
-            }else
-                s->interlaced_dct=0;
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                }
+            }
          }
          
-       s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
-        s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
-        s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
-        s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
+       s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
+        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
+        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
+        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
  
          if(s->flags&CODEC_FLAG_GRAY){
              skip_dct[4]= 1;
              skip_dct[5]= 1;
          }else{
-            int wrap_c = s->uvlinesize;
-            ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr= s->edge_emu_buffer;
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ptr_cb= s->edge_emu_buffer;
              }
-           s->dsp.get_pixels(s->block[4], ptr, wrap_c);
+           s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
  
-            ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
-                ptr= s->edge_emu_buffer;
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ptr_cr= s->edge_emu_buffer;
              }
-            s->dsp.get_pixels(s->block[5], ptr, wrap_c);
+            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
+        }
+
+        if(s->avctx->quantizer_noise_shaping){
+            get_vissual_weight(weight[0], ptr_y                 , wrap_y);
+            get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
+            get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
+            get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
+            get_vissual_weight(weight[4], ptr_cb                , wrap_c);
+            get_vissual_weight(weight[5], ptr_cr               , wrap_c);
+            memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
          }
      }else{
          op_pixels_func (*op_pix)[4];
@@ -3225,19 +3433,24 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          
          if(s->flags&CODEC_FLAG_INTERLACED_DCT){
              int progressive_score, interlaced_score;
+
+            s->interlaced_dct=0;
+            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
+                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
              
-            progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
-                             + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
-            interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
-                             + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
+            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
+
+            if(progressive_score>0){
+                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
+                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
              
-            if(progressive_score > interlaced_score + 600){
-                s->interlaced_dct=1;
+                if(progressive_score > interlaced_score){
+                    s->interlaced_dct=1;
              
-                dct_offset= wrap_y;
-                wrap_y<<=1;
-            }else
-                s->interlaced_dct=0;
+                    dct_offset= wrap_y;
+                    wrap_y<<=1;
+                }
+            }
          }
          
         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
@@ -3263,29 +3476,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          /* pre quantization */         
          if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
              //FIXME optimize
-           if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
-            if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
-            if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
-            if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
-            if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
-            if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
-#if 0
-{
- static int stat[7];
- int num=0;
- for(i=0; i<6; i++)
-  if(skip_dct[i]) num++;
- stat[num]++;
- 
- if(s->mb_x==0 && s->mb_y==0){
-  for(i=0; i<7; i++){
-   printf("%6d %1d\n", stat[i], i);
-  }
- }
-}
-#endif
+           if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
+            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
+            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
          }
  
+        if(s->avctx->quantizer_noise_shaping){
+            if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
+            if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
+            if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
+            if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
+            if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
+            if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
+            memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
+        }
      }
              
      /* DCT & quantize */
@@ -3307,6 +3514,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              }else
                  s->block_last_index[i]= -1;
          }
+        if(s->avctx->quantizer_noise_shaping){
+            for(i=0;i<6;i++) {
+                if(!skip_dct[i]){
+                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
+                }
+            }
+        }
          
          if(s->luma_elim_threshold && !s->mb_intra)
              for(i=0; i<4; i++)
@@ -3330,6 +3544,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
      }
  
+    //non c quantize code returns incorrect block_last_index FIXME
+    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
+        for(i=0; i<6; i++){
+            int j;
+            if(s->block_last_index[i]>0){
+                for(j=63; j>0; j--){
+                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
+                }
+                s->block_last_index[i]= j;
+            }
+        }
+    }
+
      /* huffman encode */
      switch(s->codec_id){ //FIXME funct ptr could be slightly faster
      case CODEC_ID_MPEG1VIDEO:
@@ -3421,6 +3648,9 @@ void ff_mpeg_flush(AVCodecContext *avctx){
      int i;
      MpegEncContext *s = avctx->priv_data;
      
+    if(s==NULL || s->picture==NULL) 
+        return;
+    
      for(i=0; i<MAX_PICTURE_COUNT; i++){
         if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
                                      || s->picture[i].type == FF_BUFFER_TYPE_USER))
@@ -3570,9 +3800,9 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
      int x,y;
      
      if(w==16 && h==16) 
-        return s->dsp.sse[0](NULL, src1, src2, stride);
+        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
      else if(w==8 && h==8)
-        return s->dsp.sse[1](NULL, src1, src2, stride);
+        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
      
      for(y=0; y<h; y++){
          for(x=0; x<w; x++){
@@ -3593,9 +3823,9 @@ static int sse_mb(MpegEncContext *s){
      if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
  
      if(w==16 && h==16)
-        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize)
-               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize)
-               +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize);
+        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
+               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
+               +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
      else
          return  sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
                 +sse(s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
@@ -3605,7 +3835,7 @@ static int sse_mb(MpegEncContext *s){
  static void encode_picture(MpegEncContext *s, int picture_number)
  {
      int mb_x, mb_y, pdif = 0;
-    int i;
+    int i, j;
      int bits;
      MpegEncContext best_s, backup_s;
      uint8_t bit_buf[2][3000];
@@ -3653,9 +3883,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  s->me.dia_size= s->avctx->pre_dia_size;
  
                  for(mb_y=s->mb_height-1; mb_y >=0 ; mb_y--) {
+                    s->mb_y = mb_y;
                      for(mb_x=s->mb_width-1; mb_x >=0 ; mb_x--) {
                          s->mb_x = mb_x;
-                        s->mb_y = mb_y;
                          ff_pre_estimate_p_frame_motion(s, mb_x, mb_y);
                      }
                  }
@@ -3665,13 +3895,13 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
          s->me.dia_size= s->avctx->dia_size;
          for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+            s->mb_y = mb_y;
              s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
              s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
              s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
              s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
              for(mb_x=0; mb_x < s->mb_width; mb_x++) {
                  s->mb_x = mb_x;
-                s->mb_y = mb_y;
                  s->block_index[0]+=2;
                  s->block_index[1]+=2;
                  s->block_index[2]+=2;
@@ -3686,10 +3916,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          }
      }else /* if(s->pict_type == I_TYPE) */{
          /* I-Frame */
-        //FIXME do we need to zero them?
-        memset(s->current_picture.motion_val[0][0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
-        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
-        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
          
          if(!s->fixed_qscale){
              /* finding spatial complexity for I-frame rate control */
@@ -3714,32 +3942,61 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
      if(s->scene_change_score > s->avctx->scenechange_threshold && s->pict_type == P_TYPE){
          s->pict_type= I_TYPE;
-        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
+        for(i=0; i<s->mb_stride*s->mb_height; i++)
+            s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
      if(!s->umvplus){
          if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
-            s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
-        
+            s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
+
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int a,b;
+                a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
+                b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
+                s->f_code= FFMAX(s->f_code, FFMAX(a,b));
+            }
+                    
              ff_fix_long_p_mvs(s);
+            ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                for(i=0; i<2; i++){
+                    for(j=0; j<2; j++)
+                        ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
+                                        s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
+                }
+            }
          }
  
          if(s->pict_type==B_TYPE){
              int a, b;
  
-            a = ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
-            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, MB_TYPE_BIDIR);
+            a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
              s->f_code = FFMAX(a, b);
  
-            a = ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
-            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, MB_TYPE_BIDIR);
+            a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
+            b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
              s->b_code = FFMAX(a, b);
  
-            ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
-            ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
-            ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
-            ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
+            ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
+            if(s->flags & CODEC_FLAG_INTERLACED_ME){
+                int dir;
+                for(dir=0; dir<2; dir++){
+                    for(i=0; i<2; i++){
+                        for(j=0; j<2; j++){
+                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
+                                          : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
+                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
+                                            s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
+                        }
+                    }
+                }
+            }
          }
      }
      
@@ -3815,6 +4072,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          break;
      case FMT_H264:
          break;
+    default:
+        assert(0);
      }
      bits= get_bit_count(&s->pb);
      s->header_bits= bits - s->last_bits;
@@ -3836,10 +4095,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          s->current_picture_ptr->error[i] = 0;
      }
      s->mb_skip_run = 0;
-    s->last_mv[0][0][0] = 0;
-    s->last_mv[0][0][1] = 0;
-    s->last_mv[1][0][0] = 0;
-    s->last_mv[1][0][1] = 0;
+    memset(s->last_mv, 0, sizeof(s->last_mv));
       
      s->last_mv_dir = 0;
  
@@ -3873,6 +4129,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              int mb_type= s->mb_type[xy];
  //            int d;
              int dmin= INT_MAX;
+            int dir;
  
              s->mb_x = mb_x;
              ff_update_block_index(s);
@@ -3913,6 +4170,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
                      assert((get_bit_count(&s->pb)&7) == 0);
                      current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
+                    
+                    if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
+                        int r= get_bit_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y;
+                        int d= 100 / s->avctx->error_rate;
+                        if(r % d == 0){
+                            current_packet_size=0;
+#ifndef ALT_BITSTREAM_WRITER
+                            s->pb.buf_ptr= s->ptr_lastgob;
+#endif
+                            assert(pbBufPtr(&s->pb) == s->ptr_lastgob);
+                        }
+                    }
          
                      if (s->avctx->rtp_callback)
                          s->avctx->rtp_callback(s->ptr_lastgob, current_packet_size, 0);
@@ -3968,25 +4237,37 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      backup_s.tex_pb= s->tex_pb;
                  }
  
-                if(mb_type&MB_TYPE_INTER){
+                if(mb_type&CANDIDATE_MB_TYPE_INTER){
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 0;
                      s->mv[0][0][0] = s->p_mv_table[xy][0];
                      s->mv[0][0][1] = s->p_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
                                   &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                  }
-                if(mb_type&MB_TYPE_SKIPED){
+                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_SKIPED){
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 0;
                      s->mv[0][0][0] = 0;
                      s->mv[0][0][1] = 0;
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_SKIPED, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPED, pb, pb2, tex_pb, 
                                   &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                  }
-                if(mb_type&MB_TYPE_INTER4V){                 
+                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mv_type = MV_TYPE_8X8;
                      s->mb_intra= 0;
@@ -3994,28 +4275,28 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                          s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
                          s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
                      }
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
                                   &dmin, &next_block, 0, 0);
                  }
-                if(mb_type&MB_TYPE_FORWARD){
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 0;
                      s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
                      s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
                                   &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                  }
-                if(mb_type&MB_TYPE_BACKWARD){
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
                      s->mv_dir = MV_DIR_BACKWARD;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 0;
                      s->mv[1][0][0] = s->b_back_mv_table[xy][0];
                      s->mv[1][0][1] = s->b_back_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
                                   &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
                  }
-                if(mb_type&MB_TYPE_BIDIR){
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 0;
@@ -4023,10 +4304,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
                      s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
                      s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
                                   &dmin, &next_block, 0, 0);
                  }
-                if(mb_type&MB_TYPE_DIRECT){
+                if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
                      int mx= s->b_direct_mv_table[xy][0];
                      int my= s->b_direct_mv_table[xy][1];
                      
@@ -4035,16 +4316,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  #ifdef CONFIG_RISKY
                      ff_mpeg4_set_direct_mv(s, mx, my);
  #endif
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
                                   &dmin, &next_block, mx, my);
                  }
-                if(mb_type&MB_TYPE_INTRA){
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(dir=0; dir<2; dir++){
+                        for(i=0; i<2; i++){
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+                        }
+                    }
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
+                                 &dmin, &next_block, 0, 0);
+                }
+                if(mb_type&CANDIDATE_MB_TYPE_INTRA){
                      s->mv_dir = 0;
                      s->mv_type = MV_TYPE_16X16;
                      s->mb_intra= 1;
                      s->mv[0][0][0] = 0;
                      s->mv[0][0][1] = 0;
-                    encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
                                   &dmin, &next_block, 0, 0);
                      if(s->h263_pred || s->h263_aic){
                          if(best_s.mb_intra)
@@ -4086,7 +4405,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                                  }
                              }
  
-                            encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
+                            encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
                                           &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
                              if(best_s.qscale != qp){
                                  if(s->mb_intra){
@@ -4146,19 +4465,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  // only one MB-Type possible
                  
                  switch(mb_type){
-                case MB_TYPE_INTRA:
+                case CANDIDATE_MB_TYPE_INTRA:
                      s->mv_dir = 0;
                      s->mb_intra= 1;
                      motion_x= s->mv[0][0][0] = 0;
                      motion_y= s->mv[0][0][1] = 0;
                      break;
-                case MB_TYPE_INTER:
+                case CANDIDATE_MB_TYPE_INTER:
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mb_intra= 0;
                      motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
                      motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
                      break;
-                case MB_TYPE_INTER4V:
+                case CANDIDATE_MB_TYPE_INTER_I:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->p_field_select_table[i][xy];
+                        s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
+                        s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
+                    }
+                    motion_x = motion_y = 0;
+                    break;
+                case CANDIDATE_MB_TYPE_INTER4V:
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mv_type = MV_TYPE_8X8;
                      s->mb_intra= 0;
@@ -4168,7 +4498,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      }
                      motion_x= motion_y= 0;
                      break;
-                case MB_TYPE_DIRECT:
+                case CANDIDATE_MB_TYPE_DIRECT:
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                      s->mb_intra= 0;
                      motion_x=s->b_direct_mv_table[xy][0];
@@ -4177,7 +4507,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
  #endif
                      break;
-                case MB_TYPE_BIDIR:
+                case CANDIDATE_MB_TYPE_BIDIR:
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                      s->mb_intra= 0;
                      motion_x=0;
@@ -4187,19 +4517,54 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
                      s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
                      break;
-                case MB_TYPE_BACKWARD:
+                case CANDIDATE_MB_TYPE_BACKWARD:
                      s->mv_dir = MV_DIR_BACKWARD;
                      s->mb_intra= 0;
                      motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
                      motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
                      break;
-                case MB_TYPE_FORWARD:
+                case CANDIDATE_MB_TYPE_FORWARD:
                      s->mv_dir = MV_DIR_FORWARD;
                      s->mb_intra= 0;
                      motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
                      motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
  //                    printf(" %d %d ", motion_x, motion_y);
                      break;
+                case CANDIDATE_MB_TYPE_FORWARD_I:
+                    s->mv_dir = MV_DIR_FORWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
+                        s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
+                        s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
+                    }
+                    motion_x=motion_y=0;
+                    break;
+                case CANDIDATE_MB_TYPE_BACKWARD_I:
+                    s->mv_dir = MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(i=0; i<2; i++){
+                        j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
+                        s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
+                        s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
+                    }
+                    motion_x=motion_y=0;
+                    break;
+                case CANDIDATE_MB_TYPE_BIDIR_I:
+                    s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
+                    s->mv_type = MV_TYPE_FIELD;
+                    s->mb_intra= 0;
+                    for(dir=0; dir<2; dir++){
+                        for(i=0; i<2; i++){
+                            j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
+                            s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
+                            s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
+                        }
+                    }
+                    motion_x=motion_y=0;
+                    break;
                  default:
                      motion_x=motion_y=0; //gcc warning fix
                      av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
@@ -4268,7 +4633,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      }
  }
  
-void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){
+#endif //CONFIG_ENCODERS
+
+static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
      const int intra= s->mb_intra;
      int i;
  
@@ -4292,38 +4659,38 @@ void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){
      }
  }
  
+#ifdef CONFIG_ENCODERS
+
  static int dct_quantize_trellis_c(MpegEncContext *s, 
                          DCTELEM *block, int n,
                          int qscale, int *overflow){
      const int *qmat;
      const uint8_t *scantable= s->intra_scantable.scantable;
+    const uint8_t *perm_scantable= s->intra_scantable.permutated;
      int max=0;
      unsigned int threshold1, threshold2;
      int bias=0;
      int run_tab[65];
      int level_tab[65];
      int score_tab[65];
+    int survivor[65];
+    int survivor_count;
      int last_run=0;
      int last_level=0;
      int last_score= 0;
-    int last_i= 0;
-    int not_coded_score= 0;
-    int coeff[3][64];
+    int last_i;
+    int coeff[2][64];
      int coeff_count[64];
      int qmul, qadd, start_i, last_non_zero, i, dc;
      const int esc_length= s->ac_esc_length;
      uint8_t * length;
      uint8_t * last_length;
-    int score_limit=0;
-    int left_limit= 0;
      const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
-    const int patch_table= s->out_format == FMT_MPEG1 && !s->mb_intra;
          
      s->dsp.fdct (block);
      
      if(s->dct_error_sum)
-        ff_denoise_dct(s, block);
-    
+        s->denoise_dct(s, block);
      qmul= qscale*16;
      qadd= ((qscale-1)|1)*8;
  
@@ -4357,37 +4724,45 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
          length     = s->inter_ac_vlc_length;
          last_length= s->inter_ac_vlc_last_length;
      }
+    last_i= start_i;
  
      threshold1= (1<<QMAT_SHIFT) - bias - 1;
      threshold2= (threshold1<<1);
  
-    for(i=start_i; i<64; i++) {
+    for(i=63; i>=start_i; i--) {
          const int j = scantable[i];
-        const int k= i-start_i;
-        int level = block[j];
-        level = level * qmat[j];
+        int level = block[j] * qmat[j];
  
-//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
-//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
          if(((unsigned)(level+threshold1))>threshold2){
-            if(level>0){
-                level= (bias + level)>>QMAT_SHIFT;
-                coeff[0][k]= level;
-                coeff[1][k]= level-1;
-//                coeff[2][k]= level-2;
+            last_non_zero = i;
+            break;
+        }
+    }
+
+    for(i=start_i; i<=last_non_zero; i++) {
+        const int j = scantable[i];
+        int level = block[j] * qmat[j];
+
+//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
+//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+        if(((unsigned)(level+threshold1))>threshold2){
+            if(level>0){
+                level= (bias + level)>>QMAT_SHIFT;
+                coeff[0][i]= level;
+                coeff[1][i]= level-1;
+//                coeff[2][k]= level-2;
              }else{
                  level= (bias - level)>>QMAT_SHIFT;
-                coeff[0][k]= -level;
-                coeff[1][k]= -level+1;
+                coeff[0][i]= -level;
+                coeff[1][i]= -level+1;
  //                coeff[2][k]= -level+2;
              }
-            coeff_count[k]= FFMIN(level, 2);
-            assert(coeff_count[k]);
+            coeff_count[i]= FFMIN(level, 2);
+            assert(coeff_count[i]);
              max |=level;
-            last_non_zero = i;
          }else{
-            coeff[0][k]= (level>>31)|1;
-            coeff_count[k]= 1;
+            coeff[0][i]= (level>>31)|1;
+            coeff_count[i]= 1;
          }
      }
      
@@ -4398,73 +4773,55 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
          return last_non_zero;
      }
  
-    score_tab[0]= 0;
+    score_tab[start_i]= 0;
+    survivor[0]= start_i;
+    survivor_count= 1;
      
-    if(patch_table){
-//        length[UNI_AC_ENC_INDEX(0, 63)]=
-//        length[UNI_AC_ENC_INDEX(0, 65)]= 2;
-    }
-
-    for(i=0; i<=last_non_zero - start_i; i++){
-        int level_index, run, j;
-        const int dct_coeff= block[ scantable[i + start_i] ];
+    for(i=start_i; i<=last_non_zero; i++){
+        int level_index, j;
+        const int dct_coeff= ABS(block[ scantable[i] ]);
          const int zero_distoration= dct_coeff*dct_coeff;
          int best_score=256*256*256*120;
-
-        last_score += zero_distoration;
-        not_coded_score += zero_distoration;
          for(level_index=0; level_index < coeff_count[i]; level_index++){
              int distoration;
              int level= coeff[level_index][i];
+            const int alevel= ABS(level);
              int unquant_coeff;
              
              assert(level);
  
              if(s->out_format == FMT_H263){
-                if(level>0){
-                    unquant_coeff= level*qmul + qadd;
-                }else{
-                    unquant_coeff= level*qmul - qadd;
-                }
+                unquant_coeff= alevel*qmul + qadd;
              }else{ //MPEG1
-                j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
+                j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
                  if(s->mb_intra){
-                    if (level < 0) {
-                        unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
-                        unquant_coeff = -((unquant_coeff - 1) | 1);
-                    } else {
-                        unquant_coeff = (int)(  level  * qscale * s->intra_matrix[j]) >> 3;
+                        unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
                          unquant_coeff =   (unquant_coeff - 1) | 1;
-                    }
                  }else{
-                    if (level < 0) {
-                        unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
-                        unquant_coeff = -((unquant_coeff - 1) | 1);
-                    } else {
-                        unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
+                        unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
                          unquant_coeff =   (unquant_coeff - 1) | 1;
-                    }
                  }
                  unquant_coeff<<= 3;
              }
  
-            distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff);
+            distoration= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distoration;
              level+=64;
              if((level&(~127)) == 0){
-                for(run=0; run<=i - left_limit; run++){
+                for(j=survivor_count-1; j>=0; j--){
+                    int run= i - survivor[j];
                      int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
                      score += score_tab[i-run];
                      
                      if(score < best_score){
-                        best_score= 
-                        score_tab[i+1]= score;
+                        best_score= score;
                          run_tab[i+1]= run;
                          level_tab[i+1]= level-64;
                      }
                  }
  
                  if(s->out_format == FMT_H263){
-                    for(run=0; run<=i - left_limit; run++){
+                    for(j=survivor_count-1; j>=0; j--){
+                        int run= i - survivor[j];
                          int score= distoration + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
                          score += score_tab[i-run];
                          if(score < last_score){
@@ -4477,19 +4834,20 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                  }
              }else{
                  distoration += esc_length*lambda;
-                for(run=0; run<=i - left_limit; run++){
+                for(j=survivor_count-1; j>=0; j--){
+                    int run= i - survivor[j];
                      int score= distoration + score_tab[i-run];
                      
                      if(score < best_score){
-                        best_score= 
-                        score_tab[i+1]= score;
+                        best_score= score;
                          run_tab[i+1]= run;
                          level_tab[i+1]= level-64;
                      }
                  }
  
                  if(s->out_format == FMT_H263){
-                    for(run=0; run<=i - left_limit; run++){
+                  for(j=survivor_count-1; j>=0; j--){
+                        int run= i - survivor[j];
                          int score= distoration + score_tab[i-run];
                          if(score < last_score){
                              last_score= score;
@@ -4501,26 +4859,28 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                  }
              }
          }
-
-        for(j=left_limit; j<=i; j++){
-            score_tab[j] += zero_distoration;
-        }
-        score_limit+= zero_distoration;
-        if(score_tab[i+1] < score_limit)
-            score_limit= score_tab[i+1];
          
+        score_tab[i+1]= best_score;
+
          //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
-        while(score_tab[ left_limit ] > score_limit + lambda) left_limit++;
-    
-        if(patch_table){
-//            length[UNI_AC_ENC_INDEX(0, 63)]=
-//            length[UNI_AC_ENC_INDEX(0, 65)]= 3;
+        if(last_non_zero <= 27){
+            for(; survivor_count; survivor_count--){
+                if(score_tab[ survivor[survivor_count-1] ] <= best_score)
+                    break;
+            }
+        }else{
+            for(; survivor_count; survivor_count--){
+                if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
+                    break;
+            }
          }
+
+        survivor[ survivor_count++ ]= i+1;
      }
  
      if(s->out_format != FMT_H263){
          last_score= 256*256*256*120;
-        for(i= left_limit; i<=last_non_zero - start_i + 1; i++){
+        for(i= survivor[0]; i<=last_non_zero + 1; i++){
              int score= score_tab[i];
              if(i) score += lambda*2; //FIXME exacter?
  
@@ -4533,10 +4893,10 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
          }
      }
  
-    s->coded_score[n] = last_score - not_coded_score;
+    s->coded_score[n] = last_score;
      
-    dc= block[0];
-    last_non_zero= last_i - 1 + start_i;
+    dc= ABS(block[0]);
+    last_non_zero= last_i - 1;
      memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
      
      if(last_non_zero < start_i)
@@ -4548,32 +4908,22 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
          
          for(i=0; i<coeff_count[0]; i++){
              int level= coeff[i][0];
-            int unquant_coeff, score, distoration;
+            int alevel= ABS(level);
+            int unquant_coeff, score, distortion;
  
              if(s->out_format == FMT_H263){
-                if(level>0){
-                    unquant_coeff= (level*qmul + qadd)>>3;
-                }else{
-                    unquant_coeff= (level*qmul - qadd)>>3;
-                }
+                    unquant_coeff= (alevel*qmul + qadd)>>3;
              }else{ //MPEG1
-                    if (level < 0) {
-                        unquant_coeff = ((((-level) << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
-                        unquant_coeff = -((unquant_coeff - 1) | 1);
-                    } else {
-                        unquant_coeff = (((  level  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
-                        unquant_coeff =   (unquant_coeff - 1) | 1;
-                    }
+                    unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
+                    unquant_coeff =   (unquant_coeff - 1) | 1;
              }
              unquant_coeff = (unquant_coeff + 4) >> 3;
              unquant_coeff<<= 3 + 3;
  
-            distoration= (unquant_coeff - dc) * (unquant_coeff - dc);
+            distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
              level+=64;
-            if((level&(~127)) == 0)
-                score= distoration + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
-            else
-                score= distoration + esc_length*lambda;
+            if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
+            else                    score= distortion + esc_length*lambda;
  
              if(score < best_score){
                  best_score= score;
@@ -4588,16 +4938,408 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
  
      i= last_i;
      assert(last_level);
-//FIXME use permutated scantable
-    block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
+
+    block[ perm_scantable[last_non_zero] ]= last_level;
      i -= last_run + 1;
      
-    for(;i>0 ; i -= run_tab[i] + 1){
-        const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
+    for(; i>start_i; i -= run_tab[i] + 1){
+        block[ perm_scantable[i-1] ]= level_tab[i];
+    }
+
+    return last_non_zero;
+}
+
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+//#define REFINE_STATS 1
+static int16_t basis[64][64];
+
+static void build_basis(uint8_t *perm){
+    int i, j, x, y;
+    emms_c();
+    for(i=0; i<8; i++){
+        for(j=0; j<8; j++){
+            for(y=0; y<8; y++){
+                for(x=0; x<8; x++){
+                    double s= 0.25*(1<<BASIS_SHIFT);
+                    int index= 8*i + j;
+                    int perm_index= perm[index];
+                    if(i==0) s*= sqrt(0.5);
+                    if(j==0) s*= sqrt(0.5);
+                    basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
+                }
+            }
+        }
+    }
+}
+
+static int try_basis(int16_t rem[64], int16_t weight[64], int index, int scale){
+    int i;
+    unsigned int sum=0;
+
+    for(i=0; i<8*8; i++){
+        int b= rem[i] - ((basis[index][i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b= (b + (1<<(RECON_SHIFT-1))) >> RECON_SHIFT;
+        assert(-512<b && b<512);
+
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+
+static void add_basis(int16_t rem[64], int index, int scale){
+    int i;
+
+    for(i=0; i<8*8; i++){
+        rem[i] -= (basis[index][i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }    
+}
+
+static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
+                        DCTELEM *block, int16_t *weight, DCTELEM *orig,
+                        int n, int qscale){
+    int16_t rem[64];
+    const int *qmat;
+    const uint8_t *scantable= s->intra_scantable.scantable;
+    const uint8_t *perm_scantable= s->intra_scantable.permutated;
+//    unsigned int threshold1, threshold2;
+//    int bias=0;
+    int run_tab[65];
+    int prev_run=0;
+    int prev_level=0;
+    int qmul, qadd, start_i, last_non_zero, i, dc;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+    int lambda;
+    int rle_index, run, q, sum;
+#ifdef REFINE_STATS
+static int count=0;
+static int after_last=0;
+static int to_zero=0;
+static int from_zero=0;
+static int raise=0;
+static int lower=0;
+static int messed_sign=0;
+#endif
+
+    if(basis[0][0] == 0)
+        build_basis(s->dsp.idct_permutation);
      
-        block[j]= level_tab[i];
-        assert(block[j]);
+    qmul= qscale*2;
+    qadd= (qscale-1)|1;
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
+            if (n < 4)
+                q = s->y_dc_scale;
+            else
+                q = s->c_dc_scale;
+        } else{
+            /* For AIC we skip quant/dequant of INTRADC */
+            q = 1;
+            qadd=0;
+        }
+        q <<= RECON_SHIFT-3;
+        /* note: block[0] is assumed to be positive */
+        dc= block[0]*q;
+//        block[0] = (block[0] + (q >> 1)) / q;
+        start_i = 1;
+        qmat = s->q_intra_matrix[qscale];
+//        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
+//            bias= 1<<(QMAT_SHIFT-1);
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+    } else {
+        dc= 0;
+        start_i = 0;
+        qmat = s->q_inter_matrix[qscale];
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+    last_non_zero = s->block_last_index[n];
+
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    for(i=0; i<64; i++){ //FIXME memsetw or similar
+        rem[i]= (orig[i]<<RECON_SHIFT) - dc; //FIXME  use orig dirrectly insteadof copying to rem[]
+    }
+#ifdef REFINE_STATS
+STOP_TIMER("memset rem[]")}
+#endif
+    sum=0;
+    for(i=0; i<64; i++){
+        int one= 36;
+        int qns=4;
+        int w;
+
+        w= ABS(weight[i]) + qns*one;
+        w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
+
+        weight[i] = w;
+//        w=weight[i] = (63*qns + (w/2)) / w;
+         
+        assert(w>0);
+        assert(w<(1<<6));
+        sum += w*w;
+    }
+    lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+    run=0;
+    rle_index=0;
+    for(i=start_i; i<=last_non_zero; i++){
+        int j= perm_scantable[i];
+        const int level= block[j];
+        int coeff;
+        
+        if(level){
+            if(level<0) coeff= qmul*level - qadd;
+            else        coeff= qmul*level + qadd;
+            run_tab[rle_index++]=run;
+            run=0;
+
+            add_basis(rem, j, coeff);
+        }else{
+            run++;
+        }
      }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("init rem[]")
+}
+}
+
+{START_TIMER
+#endif
+    for(;;){
+        int best_score=try_basis(rem, weight, 0, 0);
+        int nochange_score= best_score;
+        int best_coeff=0;
+        int best_change=0;
+        int run2, best_unquant_change;
+#ifdef REFINE_STATS
+{START_TIMER
+#endif
+        if(start_i){
+            const int level= block[0];
+            int change, old_coeff;
+
+            assert(s->mb_intra);
+            
+            old_coeff= q*level;
+            
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff;
+                
+                new_coeff= q*new_level;
+                if(new_coeff >= 2048 || new_coeff < 0)
+                    continue;
+
+                score= try_basis(rem, weight, 0, new_coeff - old_coeff);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= 0;
+                    best_change= change;
+                    best_unquant_change= new_coeff - old_coeff;
+                }
+            }
+        }
+        
+        run=0;
+        rle_index=0;
+        run2= run_tab[rle_index++];
+        prev_level=0;
+        prev_run=0;
+
+        for(i=start_i; i<64; i++){
+            int j= perm_scantable[i];
+            const int level= block[j];
+            int change, old_coeff;
+
+            if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
+                break;
+
+            if(level){
+                if(level<0) old_coeff= qmul*level - qadd;
+                else        old_coeff= qmul*level + qadd;
+                run2= run_tab[rle_index++]; //FIXME ! maybe after last
+            }else{
+                old_coeff=0;
+                run2--;
+                assert(run2>=0 || i >= last_non_zero );
+            }
+            
+            for(change=-1; change<=1; change+=2){
+                int new_level= level + change;
+                int score, new_coeff, unquant_change;
+                
+                score=0;
+                if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
+                   continue;
+
+                if(new_level){
+                    if(new_level<0) new_coeff= qmul*new_level - qadd;
+                    else            new_coeff= qmul*new_level + qadd;
+                    if(new_coeff >= 2048 || new_coeff <= -2048)
+                        continue;
+                    //FIXME check for overflow
+                    
+                    if(level){
+                        if(level < 63 && level > -63){
+                            if(i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - length[UNI_AC_ENC_INDEX(run, level+64)];
+                            else
+                                score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
+                                         - last_length[UNI_AC_ENC_INDEX(run, level+64)];
+                        }
+                    }else{
+                        assert(ABS(new_level)==1);
+                        if(i < last_non_zero){
+                            int next_i= i + run2 + 1;
+                            int next_level= block[ perm_scantable[next_i] ] + 64;
+                            
+                            if(next_level&(~127))
+                                next_level= 0;
+
+                            if(next_i < last_non_zero)
+                                score +=   length[UNI_AC_ENC_INDEX(run, 65)]
+                                         + length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                         - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                            else
+                                score +=  length[UNI_AC_ENC_INDEX(run, 65)]
+                                        + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
+                        }else{
+                            score += last_length[UNI_AC_ENC_INDEX(run, 65)];
+                            if(prev_level){
+                                score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                        - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                            }
+                        }
+                    }
+                }else{
+                    new_coeff=0;
+                    assert(ABS(level)==1);
+
+                    if(i < last_non_zero){
+                        int next_i= i + run2 + 1;
+                        int next_level= block[ perm_scantable[next_i] ] + 64;
+                            
+                        if(next_level&(~127))
+                            next_level= 0;
+
+                        if(next_i < last_non_zero)
+                            score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                        else
+                            score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
+                                     - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
+                                     - length[UNI_AC_ENC_INDEX(run, 65)];
+                    }else{
+                        score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
+                        if(prev_level){
+                            score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
+                                    - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
+                        }
+                    }
+                }
+                
+                score *= lambda;
+
+                unquant_change= new_coeff - old_coeff;
+                assert((score < 100*lambda && score > -100*lambda) || lambda==0);
+                
+                score+= try_basis(rem, weight, j, unquant_change);
+                if(score<best_score){
+                    best_score= score;
+                    best_coeff= i;
+                    best_change= change;
+                    best_unquant_change= unquant_change;
+                }
+            }
+            if(level){
+                prev_level= level + 64;
+                if(prev_level&(~127))
+                    prev_level= 0;
+                prev_run= run;
+                run=0;
+            }else{
+                run++;
+            }
+        }
+#ifdef REFINE_STATS
+STOP_TIMER("iterative step")}
+#endif
+
+        if(best_change){
+            int j= perm_scantable[ best_coeff ];
+            
+            block[j] += best_change;
+            
+            if(best_coeff > last_non_zero){
+                last_non_zero= best_coeff;
+                assert(block[j]);
+#ifdef REFINE_STATS
+after_last++;
+#endif
+            }else{
+#ifdef REFINE_STATS
+if(block[j]){
+    if(block[j] - best_change){
+        if(ABS(block[j]) > ABS(block[j] - best_change)){
+            raise++;
+        }else{
+            lower++;
+        }
+    }else{
+        from_zero++;
+    }
+}else{
+    to_zero++;
+}
+#endif
+                for(; last_non_zero>=start_i; last_non_zero--){
+                    if(block[perm_scantable[last_non_zero]])
+                        break;
+                }
+            }
+#ifdef REFINE_STATS
+count++;
+if(256*256*256*64 % count == 0){
+    printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
+}
+#endif
+            run=0;
+            rle_index=0;
+            for(i=start_i; i<=last_non_zero; i++){
+                int j= perm_scantable[i];
+                const int level= block[j];
+        
+                 if(level){
+                     run_tab[rle_index++]=run;
+                     run=0;
+                 }else{
+                     run++;
+                 }
+            }
+            
+            add_basis(rem, j, best_unquant_change);
+        }else{
+            break;
+        }
+    }
+#ifdef REFINE_STATS
+if(last_non_zero>0){
+STOP_TIMER("iterative search")
+}
+}
+#endif
  
      return last_non_zero;
  }
@@ -4606,7 +5348,7 @@ static int dct_quantize_c(MpegEncContext *s,
                          DCTELEM *block, int n,
                          int qscale, int *overflow)
  {
-    int i, j, level, last_non_zero, q;
+    int i, j, level, last_non_zero, q, start_i;
      const int *qmat;
      const uint8_t *scantable= s->intra_scantable.scantable;
      int bias;
@@ -4616,7 +5358,7 @@ static int dct_quantize_c(MpegEncContext *s,
      s->dsp.fdct (block);
  
      if(s->dct_error_sum)
-        ff_denoise_dct(s, block);
+        s->denoise_dct(s, block);
  
      if (s->mb_intra) {
          if (!s->h263_aic) {
@@ -4631,23 +5373,32 @@ static int dct_quantize_c(MpegEncContext *s,
              
          /* note: block[0] is assumed to be positive */
          block[0] = (block[0] + (q >> 1)) / q;
-        i = 1;
+        start_i = 1;
          last_non_zero = 0;
          qmat = s->q_intra_matrix[qscale];
          bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
      } else {
-        i = 0;
+        start_i = 0;
          last_non_zero = -1;
          qmat = s->q_inter_matrix[qscale];
          bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
      }
      threshold1= (1<<QMAT_SHIFT) - bias - 1;
      threshold2= (threshold1<<1);
+    for(i=63;i>=start_i;i--) {
+        j = scantable[i];
+        level = block[j] * qmat[j];
  
-    for(;i<64;i++) {
+        if(((unsigned)(level+threshold1))>threshold2){
+            last_non_zero = i;
+            break;
+        }else{
+            block[j]=0;
+        }
+    }
+    for(i=start_i; i<=last_non_zero; i++) {
          j = scantable[i];
-        level = block[j];
-        level = level * qmat[j];
+        level = block[j] * qmat[j];
  
  //        if(   bias+level >= (1<<QMAT_SHIFT)
  //           || bias-level >= (1<<QMAT_SHIFT)){
@@ -4660,7 +5411,6 @@ static int dct_quantize_c(MpegEncContext *s,
                  block[j]= -level;
              }
              max |=level;
-            last_non_zero = i;
          }else{
              block[j]=0;
          }
@@ -4676,7 +5426,7 @@ static int dct_quantize_c(MpegEncContext *s,
  
  #endif //CONFIG_ENCODERS
  
-static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
                                     DCTELEM *block, int n, int qscale)
  {
      int i, level, nCoeffs;
@@ -4684,62 +5434,60 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  
      nCoeffs= s->block_last_index[n];
      
-    if (s->mb_intra) {
-        if (n < 4) 
-            block[0] = block[0] * s->y_dc_scale;
-        else
-            block[0] = block[0] * s->c_dc_scale;
-        /* XXX: only mpeg1 */
-        quant_matrix = s->intra_matrix;
-        for(i=1;i<=nCoeffs;i++) {
-            int j= s->intra_scantable.permutated[i];
-            level = block[j];
-            if (level) {
-                if (level < 0) {
-                    level = -level;
-                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
-                    level = (level - 1) | 1;
-                    level = -level;
-                } else {
-                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
-                    level = (level - 1) | 1;
-                }
-#ifdef PARANOID
-                if (level < -2048 || level > 2047)
-                    fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
-                block[j] = level;
+    if (n < 4) 
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    /* XXX: only mpeg1 */
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (level - 1) | 1;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = (level - 1) | 1;
              }
+            block[j] = level;
          }
-    } else {
-        i = 0;
-        quant_matrix = s->inter_matrix;
-        for(;i<=nCoeffs;i++) {
-            int j= s->intra_scantable.permutated[i];
-            level = block[j];
-            if (level) {
-                if (level < 0) {
-                    level = -level;
-                    level = (((level << 1) + 1) * qscale *
-                             ((int) (quant_matrix[j]))) >> 4;
-                    level = (level - 1) | 1;
-                    level = -level;
-                } else {
-                    level = (((level << 1) + 1) * qscale *
-                             ((int) (quant_matrix[j]))) >> 4;
-                    level = (level - 1) | 1;
-                }
-#ifdef PARANOID
-                if (level < -2048 || level > 2047)
-                    fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
-                block[j] = level;
+    }
+}
+
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+
+    nCoeffs= s->block_last_index[n];
+    
+    quant_matrix = s->inter_matrix;
+    for(i=0; i<=nCoeffs; i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = (level - 1) | 1;
+                level = -level;
+            } else {
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = (level - 1) | 1;
              }
+            block[j] = level;
          }
      }
  }
  
-static void dct_unquantize_mpeg2_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
                                     DCTELEM *block, int n, int qscale)
  {
      int i, level, nCoeffs;
@@ -4748,61 +5496,96 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
      if(s->alternate_scan) nCoeffs= 63;
      else nCoeffs= s->block_last_index[n];
      
-    if (s->mb_intra) {
+    if (n < 4) 
+        block[0] = block[0] * s->y_dc_scale;
+    else
+        block[0] = block[0] * s->c_dc_scale;
+    quant_matrix = s->intra_matrix;
+    for(i=1;i<=nCoeffs;i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+                level = -level;
+            } else {
+                level = (int)(level * qscale * quant_matrix[j]) >> 3;
+            }
+            block[j] = level;
+        }
+    }
+}
+
+static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
+                                   DCTELEM *block, int n, int qscale)
+{
+    int i, level, nCoeffs;
+    const uint16_t *quant_matrix;
+    int sum=-1;
+
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
+    
+    quant_matrix = s->inter_matrix;
+    for(i=0; i<=nCoeffs; i++) {
+        int j= s->intra_scantable.permutated[i];
+        level = block[j];
+        if (level) {
+            if (level < 0) {
+                level = -level;
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+                level = -level;
+            } else {
+                level = (((level << 1) + 1) * qscale *
+                         ((int) (quant_matrix[j]))) >> 4;
+            }
+            block[j] = level;
+            sum+=level;
+        }
+    }
+    block[63]^=sum&1;
+}
+
+static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
+                                  DCTELEM *block, int n, int qscale)
+{
+    int i, level, qmul, qadd;
+    int nCoeffs;
+    
+    assert(s->block_last_index[n]>=0);
+    
+    qmul = qscale << 1;
+    
+    if (!s->h263_aic) {
          if (n < 4) 
              block[0] = block[0] * s->y_dc_scale;
          else
              block[0] = block[0] * s->c_dc_scale;
-        quant_matrix = s->intra_matrix;
-        for(i=1;i<=nCoeffs;i++) {
-            int j= s->intra_scantable.permutated[i];
-            level = block[j];
-            if (level) {
-                if (level < 0) {
-                    level = -level;
-                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
-                    level = -level;
-                } else {
-                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
-                }
-#ifdef PARANOID
-                if (level < -2048 || level > 2047)
-                    fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
-                block[j] = level;
-            }
-        }
-    } else {
-        int sum=-1;
-        i = 0;
-        quant_matrix = s->inter_matrix;
-        for(;i<=nCoeffs;i++) {
-            int j= s->intra_scantable.permutated[i];
-            level = block[j];
-            if (level) {
-                if (level < 0) {
-                    level = -level;
-                    level = (((level << 1) + 1) * qscale *
-                             ((int) (quant_matrix[j]))) >> 4;
-                    level = -level;
-                } else {
-                    level = (((level << 1) + 1) * qscale *
-                             ((int) (quant_matrix[j]))) >> 4;
-                }
-#ifdef PARANOID
-                if (level < -2048 || level > 2047)
-                    fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
-                block[j] = level;
-                sum+=level;
+        qadd = (qscale - 1) | 1;
+    }else{
+        qadd = 0;
+    }
+    if(s->ac_pred)
+        nCoeffs=63;
+    else
+        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+    for(i=1; i<=nCoeffs; i++) {
+        level = block[i];
+        if (level) {
+            if (level < 0) {
+                level = level * qmul - qadd;
+            } else {
+                level = level * qmul + qadd;
              }
+            block[i] = level;
          }
-        block[63]^=sum&1;
      }
  }
  
-
-static void dct_unquantize_h263_c(MpegEncContext *s, 
+static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
                                    DCTELEM *block, int n, int qscale)
  {
      int i, level, qmul, qadd;
@@ -4813,22 +5596,9 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
      qadd = (qscale - 1) | 1;
      qmul = qscale << 1;
      
-    if (s->mb_intra) {
-        if (!s->h263_aic) {
-            if (n < 4) 
-                block[0] = block[0] * s->y_dc_scale;
-            else
-                block[0] = block[0] * s->c_dc_scale;
-        }else
-            qadd = 0;
-        i = 1;
-        nCoeffs= 63; //does not allways use zigzag table 
-    } else {
-        i = 0;
-        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-    }
+    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
  
-    for(;i<=nCoeffs;i++) {
+    for(i=0; i<=nCoeffs; i++) {
          level = block[i];
          if (level) {
              if (level < 0) {
@@ -4836,16 +5606,11 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
              } else {
                  level = level * qmul + qadd;
              }
-#ifdef PARANOID
-                if (level < -2048 || level > 2047)
-                    fprintf(stderr, "unquant error %d %d\n", i, level);
-#endif
              block[i] = level;
          }
      }
  }
  
-
  static const AVOption mpeg4_options[] =
  {
      AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
@@ -4921,29 +5686,7 @@ static const AVOption mpeg4_options[] =
  };
  
  #ifdef CONFIG_ENCODERS
-
-AVCodec mpeg1video_encoder = {
-    "mpeg1video",
-    CODEC_TYPE_VIDEO,
-    CODEC_ID_MPEG1VIDEO,
-    sizeof(MpegEncContext),
-    MPV_encode_init,
-    MPV_encode_picture,
-    MPV_encode_end,
-};
-
  #ifdef CONFIG_RISKY
-
-AVCodec mpeg2video_encoder = {
-    "mpeg2video",
-    CODEC_TYPE_VIDEO,
-    CODEC_ID_MPEG2VIDEO,
-    sizeof(MpegEncContext),
-    MPV_encode_init,
-    MPV_encode_picture,
-    MPV_encode_end,
-};
-
  AVCodec h263_encoder = {
      "h263",
      CODEC_TYPE_VIDEO,