convert only needed matrixes in convert_matrix() (mjpeg calls it for every frame)

[ffmpeg] / libavcodec / mpegvideo.c
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index 320e2e4a8274e6b62fe4b6490d779646a16c2776..1134774b6c66ebd6b4a719c183cce10ccc319ca5 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -18,14 +18,20 @@
   *
   * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   */
+ 
+#include <ctype.h>
  #include "avcodec.h"
  #include "dsputil.h"
  #include "mpegvideo.h"
+#include "simple_idct.h"
  
  #ifdef USE_FASTMEMCPY
  #include "fastmemcpy.h"
  #endif
  
+//#undef NDEBUG
+//#include <assert.h>
+
  static void encode_picture(MpegEncContext *s, int picture_number);
  static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
                                     DCTELEM *block, int n, int qscale);
@@ -36,7 +42,6 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
  static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
  static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  
-int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)= dct_quantize_c;
  void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
  static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, 
                                      int src_x, int src_y, int w, int h);
@@ -71,41 +76,50 @@ static UINT8 h263_chroma_roundtab[16] = {
  static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
  static UINT8 default_fcode_tab[MAX_MV*2+1];
  
-extern UINT8 zigzag_end[64];
-
  /* default motion estimation */
  int motion_estimation_method = ME_EPZS;
  
-static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
-                           const UINT16 *quant_matrix, int bias)
+static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
+                           const UINT16 *quant_matrix, int bias, int qmin, int qmax)
  {
      int qscale;
  
-    for(qscale=1; qscale<32; qscale++){
+    for(qscale=qmin; qscale<=qmax; qscale++){
          int i;
-        if (av_fdct == fdct_ifast) {
+        if (s->fdct == ff_jpeg_fdct_islow) {
              for(i=0;i<64;i++) {
-                const int j= block_permute_op(i);
+                const int j= s->idct_permutation[i];
                  /* 16 <= qscale * quant_matrix[i] <= 7905 */
                  /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                  /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                  /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
                  
-                qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
+                qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / 
+                                (qscale * quant_matrix[j]));
+            }
+        } else if (s->fdct == fdct_ifast) {
+            for(i=0;i<64;i++) {
+                const int j= s->idct_permutation[i];
+                /* 16 <= qscale * quant_matrix[i] <= 7905 */
+                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
+                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+                
+                qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / 
                                  (aanscales[i] * qscale * quant_matrix[j]));
              }
          } else {
              for(i=0;i<64;i++) {
+                const int j= s->idct_permutation[i];
                  /* We can safely suppose that 16 <= quant_matrix[i] <= 255
                     So 16           <= qscale * quant_matrix[i]             <= 7905
                     so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
                     so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
                  */
                  qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
-                qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
+                qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
  
                  if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
-
                  qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
              }
          }
@@ -120,6 +134,50 @@ static void convert_matrix(int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*q
          goto fail;\
      }\
  }
+/*
+static void build_end(void)
+{
+    int lastIndex;
+    int lastIndexAfterPerm=0;
+    for(lastIndex=0; lastIndex<64; lastIndex++)
+    {
+        if(ff_zigzag_direct[lastIndex] > lastIndexAfterPerm) 
+            lastIndexAfterPerm= ff_zigzag_direct[lastIndex];
+        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
+    }
+}
+*/
+void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
+    int i;
+    int end;
+
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = s->idct_permutation[j];
+    }
+    
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+   converted */
+static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped(block, dest, line_size);
+}
+static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped(block, dest, line_size);
+}
  
  /* init common structure for both encoder and decoder */
  int MPV_common_init(MpegEncContext *s)
@@ -130,6 +188,24 @@ int MPV_common_init(MpegEncContext *s)
      s->dct_unquantize_h263 = dct_unquantize_h263_c;
      s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
      s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
+    s->dct_quantize= dct_quantize_c;
+
+    if(s->avctx->dct_algo==FF_DCT_FASTINT)
+        s->fdct = fdct_ifast;
+    else
+        s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
+
+    if(s->avctx->idct_algo==FF_IDCT_INT){
+        s->idct_put= ff_jref_idct_put;
+        s->idct_add= ff_jref_idct_add;
+        for(i=0; i<64; i++)
+            s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+    }else{ //accurate/default
+        s->idct_put= simple_idct_put;
+        s->idct_add= simple_idct_add;
+        for(i=0; i<64; i++)
+            s->idct_permutation[i]= i;
+    }
          
  #ifdef HAVE_MMX
      MPV_common_init_mmx(s);
@@ -137,6 +213,18 @@ int MPV_common_init(MpegEncContext *s)
  #ifdef ARCH_ALPHA
      MPV_common_init_axp(s);
  #endif
+#ifdef HAVE_MLIB
+    MPV_common_init_mlib(s);
+#endif
+    
+
+    /* load & permutate scantables
+       note: only wmv uses differnt ones 
+    */
+    ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
+    ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
+    ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+    ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
  
      s->mb_width = (s->width + 15) / 16;
      s->mb_height = (s->height + 15) / 16;
@@ -144,6 +232,12 @@ int MPV_common_init(MpegEncContext *s)
      /* set default edge pos, will be overriden in decode_header if needed */
      s->h_edge_pos= s->mb_width*16;
      s->v_edge_pos= s->mb_height*16;
+    
+    /* convert fourcc to upper case */
+    s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)          
+                     + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
+                     + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) 
+                     + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
  
      s->mb_num = s->mb_width * s->mb_height;
      if(!(s->flags&CODEC_FLAG_DR1)){
@@ -189,6 +283,7 @@ int MPV_common_init(MpegEncContext *s)
          
          CHECKED_ALLOCZ(s->mb_var   , s->mb_num * sizeof(INT16))
          CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16))
+        CHECKED_ALLOCZ(s->mb_mean  , s->mb_num * sizeof(INT8))
  
          /* Allocate MV tables */
          CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(INT16))
@@ -226,6 +321,11 @@ int MPV_common_init(MpegEncContext *s)
              CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
              CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
          }
+        
+        if(s->msmpeg4_version){
+            CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
+        }
+        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
      }
      
      if (s->out_format == FMT_H263 || s->encoding) {
@@ -236,9 +336,13 @@ int MPV_common_init(MpegEncContext *s)
          /* MV prediction */
          size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
          CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
-        
-        /* 4mv direct mode decoding table */
-        CHECKED_ALLOCZ(s->non_b_mv4_table, size * sizeof(UINT8))
+    }
+
+    if(s->codec_id==CODEC_ID_MPEG4){
+        /* 4mv and interlaced direct mode decoding tables */
+        CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8))
+        CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16))
+        CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8))
      }
  
      if (s->h263_pred || s->h263_plus) {
@@ -262,10 +366,6 @@ int MPV_common_init(MpegEncContext *s)
          
          /* cbp values */
          CHECKED_ALLOCZ(s->coded_block, y_size);
-
-        /* which mb is a intra block */
-        CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
-        memset(s->mbintra_table, 1, s->mb_num);
          
          /* divx501 bitstream reorder buffer */
          CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
@@ -276,6 +376,10 @@ int MPV_common_init(MpegEncContext *s)
      }
      CHECKED_ALLOCZ(s->qscale_table  , s->mb_num * sizeof(UINT8))
      
+    /* which mb is a intra block */
+    CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
+    memset(s->mbintra_table, 1, s->mb_num);
+    
      /* default structure is frame */
      s->picture_structure = PICT_FRAME;
      
@@ -302,6 +406,7 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->mb_type);
      av_freep(&s->mb_var);
      av_freep(&s->mc_mb_var);
+    av_freep(&s->mb_mean);
      av_freep(&s->p_mv_table);
      av_freep(&s->b_forw_mv_table);
      av_freep(&s->b_back_mv_table);
@@ -327,8 +432,12 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->tex_pb_buffer);
      av_freep(&s->pb2_buffer);
      av_freep(&s->edge_emu_buffer);
-    av_freep(&s->non_b_mv4_table);
-
+    av_freep(&s->co_located_type_table);
+    av_freep(&s->field_mv_table);
+    av_freep(&s->field_select_table);
+    av_freep(&s->avctx->stats_out);
+    av_freep(&s->ac_stats);
+    
      for(i=0;i<3;i++) {
          int j;
          if(!(s->flags&CODEC_FLAG_DR1)){
@@ -377,13 +486,15 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->max_qdiff= avctx->max_qdiff;
      s->qcompress= avctx->qcompress;
      s->qblur= avctx->qblur;
-    s->b_quant_factor= avctx->b_quant_factor;
-    s->b_quant_offset= avctx->b_quant_offset;
      s->avctx = avctx;
      s->aspect_ratio_info= avctx->aspect_ratio_info;
+    if (avctx->aspect_ratio_info == FF_ASPECT_EXTENDED)
+    {
+       s->aspected_width = avctx->aspected_width;
+       s->aspected_height = avctx->aspected_height;
+    }
      s->flags= avctx->flags;
      s->max_b_frames= avctx->max_b_frames;
-    s->rc_strategy= avctx->rc_strategy;
      s->b_frame_strategy= avctx->b_frame_strategy;
      s->codec_id= avctx->codec->id;
      s->luma_elim_threshold  = avctx->luma_elim_threshold;
@@ -398,17 +509,26 @@ int MPV_encode_init(AVCodecContext *avctx)
      } else {
          s->intra_only = 0;
      }
-    
+
      /* ME algorithm */
      if (avctx->me_method == 0)
          /* For compatibility */
          s->me_method = motion_estimation_method;
      else
          s->me_method = avctx->me_method;
-        
+
      /* Fixed QSCALE */
      s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
      
+    s->adaptive_quant= (   s->avctx->lumi_masking
+                        || s->avctx->dark_masking
+                        || s->avctx->temporal_cplx_masking 
+                        || s->avctx->spatial_cplx_masking
+                        || s->avctx->p_masking)
+                       && !s->fixed_qscale;
+    
+    s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
+
      switch(avctx->codec->id) {
      case CODEC_ID_MPEG1VIDEO:
          s->out_format = FMT_MPEG1;
@@ -525,13 +645,6 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->y_dc_scale_table=
      s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
   
-    if (s->out_format == FMT_H263)
-        h263_encode_init(s);
-    else if (s->out_format == FMT_MPEG1)
-        ff_mpeg1_encode_init(s);
-    if(s->msmpeg4_version)
-        ff_msmpeg4_encode_init(s);
-
      /* dont use mv_penalty table for crap MV as it would be confused */
      if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
  
@@ -541,27 +654,35 @@ int MPV_encode_init(AVCodecContext *avctx)
      if (MPV_common_init(s) < 0)
          return -1;
      
+    if (s->out_format == FMT_H263)
+        h263_encode_init(s);
+    else if (s->out_format == FMT_MPEG1)
+        ff_mpeg1_encode_init(s);
+    if(s->msmpeg4_version)
+        ff_msmpeg4_encode_init(s);
+
      /* init default q matrix */
      for(i=0;i<64;i++) {
+        int j= s->idct_permutation[i];
          if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
-            s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i];
-            s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i];
+            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
+            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
          }else if(s->out_format == FMT_H263){
-            s->intra_matrix[i] =
-            s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
+            s->intra_matrix[j] =
+            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
          }else{ /* mpeg1 */
-            s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i];
-            s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i];
+            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
+            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
          }
      }
  
      /* precompute matrix */
      /* for mjpeg, we do include qscale in the matrix */
      if (s->out_format != FMT_MJPEG) {
-        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
-                       s->intra_matrix, s->intra_quant_bias);
-        convert_matrix(s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
-                       s->inter_matrix, s->inter_quant_bias);
+        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, 
+                       s->intra_matrix, s->intra_quant_bias, 1, 31);
+        convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, 
+                       s->inter_matrix, s->inter_quant_bias, 1, 31);
      }
  
      if(ff_rate_control_init(s) < 0)
@@ -595,6 +716,7 @@ int MPV_encode_end(AVCodecContext *avctx)
  }
  
  /* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
  static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
  {
      UINT8 *ptr, *last_line;
@@ -678,7 +800,6 @@ void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
                  avctx->dr_opaque_frame= s->next_dr_opaque;
          }
      }
-
      /* set dequantizer, we cant do it during init as it might change for mpeg4
         and we cant do it in the header decode as init isnt called for mpeg4 there yet */
      if(s->out_format == FMT_H263){
@@ -693,7 +814,8 @@ void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
  /* generic function for encode/decode called after a frame has been coded/decoded */
  void MPV_frame_end(MpegEncContext *s)
  {
-//    if((s->picture_number%100)==0 && s->encoding) printf("sads:%d //\n", sads);
+    s->avctx->key_frame   = (s->pict_type == I_TYPE);
+    s->avctx->pict_type   = s->pict_type;
  
      /* draw edge for correct motion prediction if outside */
      if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
@@ -703,10 +825,9 @@ void MPV_frame_end(MpegEncContext *s)
      }
      emms_c();
      
+    s->last_pict_type    = s->pict_type;
      if(s->pict_type!=B_TYPE){
          s->last_non_b_pict_type= s->pict_type;
-        s->last_non_b_qscale= s->qscale;
-        s->last_non_b_mc_mb_var= s->mc_mb_var_sum;
          s->num_available_buffers++;
          if(s->num_available_buffers>2) s->num_available_buffers= 2;
      }
@@ -844,8 +965,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
          MPV_frame_start(s, avctx);
  
          encode_picture(s, s->picture_number);
-        avctx->key_frame   = (s->pict_type == I_TYPE);
-        avctx->pict_type   = s->pict_type;
+        
          avctx->real_pict_num  = s->picture_number;
          avctx->header_bits = s->header_bits;
          avctx->mv_bits     = s->mv_bits;
@@ -853,7 +973,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
          avctx->i_tex_bits  = s->i_tex_bits;
          avctx->p_tex_bits  = s->p_tex_bits;
          avctx->i_count     = s->i_count;
-        avctx->p_count     = s->p_count;
+        avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
          avctx->skip_count  = s->skip_count;
  
          MPV_frame_end(s);
@@ -861,7 +981,8 @@ int MPV_encode_picture(AVCodecContext *avctx,
          if (s->out_format == FMT_MJPEG)
              mjpeg_picture_trailer(s);
  
-        avctx->quality = s->qscale;
+        if(!s->fixed_qscale)
+            avctx->quality = s->qscale;
          
          if(s->flags&CODEC_FLAG_PASS1)
              ff_write_pass1_stats(s);
@@ -873,13 +994,21 @@ int MPV_encode_picture(AVCodecContext *avctx,
  
      flush_put_bits(&s->pb);
      s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
-    if(s->pict_type==B_TYPE) s->pb_frame_bits+= s->frame_bits;
-    else                     s->pb_frame_bits= s->frame_bits;
-
+    
      s->total_bits += s->frame_bits;
      avctx->frame_bits  = s->frame_bits;
  //printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", 
  //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
+#if 0 //dump some stats to stats.txt for testing/debuging
+if(s->max_b_frames==0)
+{
+    static FILE *f=NULL;
+    if(!f) f= fopen("stats.txt", "wb");
+    get_psnr(pict->data, s->current_picture,
+             pict->linesize, s->linesize, avctx);
+    fprintf(f, "%7d, %7d, %2.4f\n", pbBufPtr(&s->pb) - s->pb.buf, s->qscale, avctx->psnr_y);
+}
+#endif
  
      if (avctx->get_psnr) {
          /* At this point pict->data should have the original frame   */
@@ -1029,18 +1158,19 @@ static inline void mpeg_motion(MpegEncContext *s,
                                 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
                                 int dest_offset,
                                 UINT8 **ref_picture, int src_offset,
-                               int field_based, op_pixels_func *pix_op,
+                               int field_based, op_pixels_func (*pix_op)[4],
                                 int motion_x, int motion_y, int h)
  {
      UINT8 *ptr;
      int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
      int emu=0;
-    
+#if 0    
  if(s->quarter_sample)
  {
      motion_x>>=1;
      motion_y>>=1;
  }
+#endif
      dxy = ((motion_y & 1) << 1) | (motion_x & 1);
      src_x = s->mb_x * 16 + (motion_x >> 1);
      src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
@@ -1067,8 +1197,7 @@ if(s->quarter_sample)
              emu=1;
          }
      }
-    pix_op[dxy](dest_y, ptr, linesize, h);
-    pix_op[dxy](dest_y + 8, ptr + 8, linesize, h);
+    pix_op[0][dxy](dest_y, ptr, linesize, h);
  
      if(s->flags&CODEC_FLAG_GRAY) return;
  
@@ -1102,26 +1231,26 @@ if(s->quarter_sample)
          emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
          ptr= s->edge_emu_buffer;
      }
-    pix_op[dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
+    pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
  
      ptr = ref_picture[2] + offset;
      if(emu){
          emulated_edge_mc(s, ptr, uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
          ptr= s->edge_emu_buffer;
      }
-    pix_op[dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
+    pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
  }
  
  static inline void qpel_motion(MpegEncContext *s,
                                 UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
                                 int dest_offset,
                                 UINT8 **ref_picture, int src_offset,
-                               int field_based, op_pixels_func *pix_op,
-                               qpel_mc_func *qpix_op,
+                               int field_based, op_pixels_func (*pix_op)[4],
+                               qpel_mc_func (*qpix_op)[16],
                                 int motion_x, int motion_y, int h)
  {
      UINT8 *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize;
+    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
      int emu=0;
  
      dxy = ((motion_y & 3) << 2) | (motion_x & 3);
@@ -1137,6 +1266,7 @@ static inline void qpel_motion(MpegEncContext *s,
      if (src_y == height)
          dxy &= ~12;
      linesize = s->linesize << field_based;
+    uvlinesize = s->uvlinesize << field_based;
      ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
      dest_y += dest_offset;
  //printf("%d %d %d\n", src_x, src_y, dxy);
@@ -1149,24 +1279,33 @@ static inline void qpel_motion(MpegEncContext *s,
              emu=1;
          }
      }
-    qpix_op[dxy](dest_y                 , ptr                 , linesize, linesize, motion_x&3, motion_y&3);
-    qpix_op[dxy](dest_y              + 8, ptr              + 8, linesize, linesize, motion_x&3, motion_y&3);
-    qpix_op[dxy](dest_y + linesize*8    , ptr + linesize*8    , linesize, linesize, motion_x&3, motion_y&3);
-    qpix_op[dxy](dest_y + linesize*8 + 8, ptr + linesize*8 + 8, linesize, linesize, motion_x&3, motion_y&3);
-    
+    if(!field_based)
+        qpix_op[0][dxy](dest_y, ptr, linesize);
+    else{
+        //damn interlaced mode
+        //FIXME boundary mirroring is not exactly correct here
+        qpix_op[1][dxy](dest_y  , ptr  , linesize);
+        qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
+    }
+
      if(s->flags&CODEC_FLAG_GRAY) return;
  
-    mx= (motion_x>>1) | (motion_x&1);
-    my= (motion_y>>1) | (motion_y&1);
+    if(field_based){
+        mx= motion_x/2;
+        my= motion_y>>1;
+    }else if(s->divx_version){
+        mx= (motion_x>>1)|(motion_x&1);
+        my= (motion_y>>1)|(motion_y&1);
+    }else{
+        mx= motion_x/2;
+        my= motion_y/2;
+    }
+    mx= (mx>>1)|(mx&1);
+    my= (my>>1)|(my&1);
+    dxy= (mx&1) | ((my&1)<<1);
+    mx>>=1;
+    my>>=1;
  
-    dxy = 0;
-    if ((mx & 3) != 0)
-        dxy |= 1;
-    if ((my & 3) != 0)
-        dxy |= 2;
-    mx = mx >> 2;
-    my = my >> 2;
-    
      src_x = s->mb_x * 8 + mx;
      src_y = s->mb_y * (8 >> field_based) + my;
      src_x = clip(src_x, -8, s->width >> 1);
@@ -1176,27 +1315,27 @@ static inline void qpel_motion(MpegEncContext *s,
      if (src_y == (height >> 1))
          dxy &= ~2;
  
-    offset = (src_y * s->uvlinesize) + src_x + (src_offset >> 1);
+    offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
      ptr = ref_picture[1] + offset;
      if(emu){
-        emulated_edge_mc(s, ptr,  s->uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
+        emulated_edge_mc(s, ptr,  uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
          ptr= s->edge_emu_buffer;
      }
-    pix_op[dxy](dest_cb + (dest_offset >> 1), ptr,  s->uvlinesize, h >> 1);
+    pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
      
      ptr = ref_picture[2] + offset;
      if(emu){
-        emulated_edge_mc(s, ptr,  s->uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
+        emulated_edge_mc(s, ptr,  uvlinesize, 9, (h>>1)+1, src_x, src_y, s->h_edge_pos>>1, v_edge_pos>>1);
          ptr= s->edge_emu_buffer;
      }
-    pix_op[dxy](dest_cr + (dest_offset >> 1), ptr,  s->uvlinesize, h >> 1);
+    pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
  }
  
  
  static inline void MPV_motion(MpegEncContext *s, 
                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
                                int dir, UINT8 **ref_picture, 
-                              op_pixels_func *pix_op, qpel_mc_func *qpix_op)
+                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
  {
      int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
      int mb_x, mb_y, i;
@@ -1209,19 +1348,10 @@ static inline void MPV_motion(MpegEncContext *s,
      switch(s->mv_type) {
      case MV_TYPE_16X16:
          if(s->mcsel){
-#if 0
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, 0,
-                        0, pix_op,
-                        s->sprite_offset[0][0]>>3,
-                        s->sprite_offset[0][1]>>3,
-                        16);
-#else
              gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
                          ref_picture, 0,
                          16);
-#endif
-        }else if(s->quarter_sample && dir==0){ //FIXME
+        }else if(s->quarter_sample){
              qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
                          ref_picture, 0,
                          0, pix_op, qpix_op,
@@ -1234,42 +1364,76 @@ static inline void MPV_motion(MpegEncContext *s,
          }           
          break;
      case MV_TYPE_8X8:
-        for(i=0;i<4;i++) {
-            motion_x = s->mv[dir][i][0];
-            motion_y = s->mv[dir][i][1];
+        mx = 0;
+        my = 0;
+        if(s->quarter_sample){
+            for(i=0;i<4;i++) {
+                motion_x = s->mv[dir][i][0];
+                motion_y = s->mv[dir][i][1];
+
+                dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+                src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
+                src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
+                    
+                /* WARNING: do no forget half pels */
+                src_x = clip(src_x, -16, s->width);
+                if (src_x == s->width)
+                    dxy &= ~3;
+                src_y = clip(src_y, -16, s->height);
+                if (src_y == s->height)
+                    dxy &= ~12;
+                    
+                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
+                if(s->flags&CODEC_FLAG_EMU_EDGE){
+                    if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
+                                          || src_y + (motion_y&3) + 8 > s->v_edge_pos){
+                        emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+                        ptr= s->edge_emu_buffer;
+                    }
+                }
+                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
+                qpix_op[1][dxy](dest, ptr, s->linesize);
  
-            dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-            src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
-            src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
+                mx += s->mv[dir][i][0]/2;
+                my += s->mv[dir][i][1]/2;
+            }
+        }else{
+            for(i=0;i<4;i++) {
+                motion_x = s->mv[dir][i][0];
+                motion_y = s->mv[dir][i][1];
+
+                dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+                src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
+                src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
                      
-            /* WARNING: do no forget half pels */
-            src_x = clip(src_x, -16, s->width);
-            if (src_x == s->width)
-                dxy &= ~1;
-            src_y = clip(src_y, -16, s->height);
-            if (src_y == s->height)
-                dxy &= ~2;
+                /* WARNING: do no forget half pels */
+                src_x = clip(src_x, -16, s->width);
+                if (src_x == s->width)
+                    dxy &= ~1;
+                src_y = clip(src_y, -16, s->height);
+                if (src_y == s->height)
+                    dxy &= ~2;
                      
-            ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
-            if(s->flags&CODEC_FLAG_EMU_EDGE){
-                if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
-                                      || src_y + (motion_y&1) + 8 > s->v_edge_pos){
-                    emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-                    ptr= s->edge_emu_buffer;
+                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
+                if(s->flags&CODEC_FLAG_EMU_EDGE){
+                    if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
+                                          || src_y + (motion_y&1) + 8 > s->v_edge_pos){
+                        emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+                        ptr= s->edge_emu_buffer;
+                    }
                  }
+                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
+                pix_op[1][dxy](dest, ptr, s->linesize, 8);
+
+                mx += s->mv[dir][i][0];
+                my += s->mv[dir][i][1];
              }
-            dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
-            pix_op[dxy](dest, ptr, s->linesize, 8);
          }
-    
+
          if(s->flags&CODEC_FLAG_GRAY) break;
          /* In case of 8X8, we construct a single chroma motion vector
             with a special rounding */
-        mx = 0;
-        my = 0;
          for(i=0;i<4;i++) {
-            mx += s->mv[dir][i][0];
-            my += s->mv[dir][i][1];
          }
          if (mx >= 0)
              mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
@@ -1306,27 +1470,40 @@ static inline void MPV_motion(MpegEncContext *s,
                      emu=1;
                  }
              }
-        pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
+        pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
  
          ptr = ref_picture[2] + offset;
          if(emu){
              emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
              ptr= s->edge_emu_buffer;
          }
-        pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
+        pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
          break;
      case MV_TYPE_FIELD:
          if (s->picture_structure == PICT_FRAME) {
-            /* top field */
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                        ref_picture, s->field_select[dir][0] ? s->linesize : 0,
-                        1, pix_op,
-                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
-            /* bottom field */
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
-                        ref_picture, s->field_select[dir][1] ? s->linesize : 0,
-                        1, pix_op,
-                        s->mv[dir][1][0], s->mv[dir][1][1], 8);
+            if(s->quarter_sample){
+                /* top field */
+                qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
+                            ref_picture, s->field_select[dir][0] ? s->linesize : 0,
+                            1, pix_op, qpix_op,
+                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
+                /* bottom field */
+                qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
+                            ref_picture, s->field_select[dir][1] ? s->linesize : 0,
+                            1, pix_op, qpix_op,
+                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
+            }else{
+                /* top field */       
+                mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                            ref_picture, s->field_select[dir][0] ? s->linesize : 0,
+                            1, pix_op,
+                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
+                /* bottom field */
+                mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
+                            ref_picture, s->field_select[dir][1] ? s->linesize : 0,
+                            1, pix_op,
+                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
+            }
          } else {
              
  
@@ -1342,7 +1519,7 @@ static inline void put_dct(MpegEncContext *s,
  {
      if (!s->mpeg2)
          s->dct_unquantize(s, block, i, s->qscale);
-    ff_idct_put (dest, line_size, block);
+    s->idct_put (dest, line_size, block);
  }
  
  /* add block[] to dest[] */
@@ -1350,7 +1527,7 @@ static inline void add_dct(MpegEncContext *s,
                             DCTELEM *block, int i, UINT8 *dest, int line_size)
  {
      if (s->block_last_index[i] >= 0) {
-        ff_idct_add (dest, line_size, block);
+        s->idct_add (dest, line_size, block);
      }
  }
  
@@ -1360,7 +1537,7 @@ static inline void add_dequant_dct(MpegEncContext *s,
      if (s->block_last_index[i] >= 0) {
          s->dct_unquantize(s, block, i, s->qscale);
  
-        ff_idct_add (dest, line_size, block);
+        s->idct_add (dest, line_size, block);
      }
  }
  
@@ -1441,18 +1618,36 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
  
      /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
      if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
-        int motion_x, motion_y;
          
          const int wrap = s->block_wrap[0];
          const int xy = s->block_index[0];
-        if (s->mb_intra) {
-            motion_x = 0;
-            motion_y = 0;
-            goto motion_init;
-        } else if (s->mv_type == MV_TYPE_16X16) {
-            motion_x = s->mv[0][0][0];
-            motion_y = s->mv[0][0][1];
-        motion_init:
+        const int mb_index= s->mb_x + s->mb_y*s->mb_width;
+        if(s->mv_type == MV_TYPE_8X8){
+            s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
+        } else {
+            int motion_x, motion_y;
+            if (s->mb_intra) {
+                motion_x = 0;
+                motion_y = 0;
+                if(s->co_located_type_table)
+                    s->co_located_type_table[mb_index]= 0;
+            } else if (s->mv_type == MV_TYPE_16X16) {
+                motion_x = s->mv[0][0][0];
+                motion_y = s->mv[0][0][1];
+                if(s->co_located_type_table)
+                    s->co_located_type_table[mb_index]= 0;
+            } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
+                int i;
+                motion_x = s->mv[0][0][0] + s->mv[0][1][0];
+                motion_y = s->mv[0][0][1] + s->mv[0][1][1];
+                motion_x = (motion_x>>1) | (motion_x&1);
+                for(i=0; i<2; i++){
+                    s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
+                    s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
+                    s->field_select_table[mb_index][i]= s->field_select[0][i];
+                }
+                s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
+            }
              /* no update if 8X8 because it has been done during parsing */
              s->motion_val[xy][0] = motion_x;
              s->motion_val[xy][1] = motion_y;
@@ -1462,17 +1657,14 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              s->motion_val[xy + wrap][1] = motion_y;
              s->motion_val[xy + 1 + wrap][0] = motion_x;
              s->motion_val[xy + 1 + wrap][1] = motion_y;
-            s->non_b_mv4_table[xy]=0;
-        } else { /* 8X8 */
-            s->non_b_mv4_table[xy]=1;
          }
      }
      
      if (!(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) {
          UINT8 *dest_y, *dest_cb, *dest_cr;
          int dct_linesize, dct_offset;
-        op_pixels_func *op_pix;
-        qpel_mc_func *op_qpix;
+        op_pixels_func (*op_pix)[4];
+        qpel_mc_func (*op_qpix)[16];
  
          /* avoid copy if macroblock skipped in last frame too 
             dont touch it for B-frames as they need the skip info from the next p-frame */
@@ -1511,18 +1703,16 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
                  if ((!s->no_rounding) || s->pict_type==B_TYPE){                
                      op_pix = put_pixels_tab;
-                    op_qpix= qpel_mc_rnd_tab;
+                    op_qpix= put_qpel_pixels_tab;
                  }else{
                      op_pix = put_no_rnd_pixels_tab;
-                    op_qpix= qpel_mc_no_rnd_tab;
+                    op_qpix= put_no_rnd_qpel_pixels_tab;
                  }
  
                  if (s->mv_dir & MV_DIR_FORWARD) {
                      MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-                    if ((!s->no_rounding) || s->pict_type==B_TYPE)
-                        op_pix = avg_pixels_tab;
-                    else
-                        op_pix = avg_no_rnd_pixels_tab;
+                    op_pix = avg_pixels_tab;
+                    op_qpix= avg_qpel_pixels_tab;
                  }
                  if (s->mv_dir & MV_DIR_BACKWARD) {
                      MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
@@ -1571,7 +1761,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
      emms_c(); //FIXME remove
  }
  
-static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold, int skip_dc)
+static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
  {
      static const char tab[64]=
          {3,2,2,1,1,1,1,1,
@@ -1587,14 +1777,19 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
      int i;
      DCTELEM *block= s->block[n];
      const int last_index= s->block_last_index[n];
+    int skip_dc;
+
+    if(threshold<0){
+        skip_dc=0;
+        threshold= -threshold;
+    }else
+        skip_dc=1;
  
-    if(skip_dc) skip_dc=1;
-    
      /* are all which we could set to zero are allready zero? */
      if(last_index<=skip_dc - 1) return;
  
      for(i=0; i<=last_index; i++){
-        const int j = zigzag_direct[i];
+        const int j = s->intra_scantable.permutated[i];
          const int level = ABS(block[j]);
          if(level==1){
              if(skip_dc && i==0) continue;
@@ -1608,7 +1803,7 @@ static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int th
      }
      if(score >= threshold) return;
      for(i=skip_dc; i<=last_index; i++){
-        const int j = zigzag_direct[i];
+        const int j = s->intra_scantable.permutated[i];
          block[j]=0;
      }
      if(block[0]) s->block_last_index[n]= 0;
@@ -1620,9 +1815,14 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
      int i;
      const int maxlevel= s->max_qcoeff;
      const int minlevel= s->min_qcoeff;
-        
-    for(i=0;i<=last_index; i++){
-        const int j = zigzag_direct[i];
+    
+    if(s->mb_intra){
+        i=1; //skip clipping of intra dc
+    }else
+        i=0;
+    
+    for(;i<=last_index; i++){
+        const int j= s->intra_scantable.permutated[i];
          int level = block[j];
         
          if     (level>maxlevel) level=maxlevel;
@@ -1631,62 +1831,225 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
      }
  }
  
+static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
+{
+    int i;
+
+    if(s->mb_intra){
+        i=1; //skip clipping of intra dc
+         //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
+    }else
+        i=0;
+    
+    for(;i<=s->block_last_index[n]; i++){
+        const int j = s->intra_scantable.permutated[i];
+        int level = block[j];
+        
+        block[j]= ROUNDED_DIV(level*oldq, newq);
+    }
+
+    for(i=s->block_last_index[n]; i>=0; i--){
+        const int j = s->intra_scantable.permutated[i];
+        if(block[j]) break;
+    }
+    s->block_last_index[n]= i;
+}
+
+static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64])
+{
+    int i,n, newq;
+    const int maxlevel= s->max_qcoeff;
+    const int minlevel= s->min_qcoeff;
+    int largest=0, smallest=0;
+
+    assert(s->adaptive_quant);
+    
+    for(n=0; n<6; n++){
+        if(s->mb_intra){
+            i=1; //skip clipping of intra dc
+             //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
+        }else
+            i=0;
+
+        for(;i<=s->block_last_index[n]; i++){
+            const int j = s->intra_scantable.permutated[i];
+            int level = block[n][j];
+            if(largest  < level) largest = level;
+            if(smallest > level) smallest= level;
+        }
+    }
+    
+    for(newq=s->qscale+1; newq<32; newq++){
+        if(   ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel
+           && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel) 
+            break;
+    }
+        
+    if(s->out_format==FMT_H263){
+        /* h263 like formats cannot change qscale by more than 2 easiely */
+        if(s->avctx->qmin + 2 < newq)
+            newq= s->avctx->qmin + 2;
+    }
+
+    for(n=0; n<6; n++){
+        requantize_coeffs(s, block[n], s->qscale, newq, n);
+        clip_coeffs(s, block[n], s->block_last_index[n]);
+    }
+     
+    s->dquant+= newq - s->qscale;
+    s->qscale= newq;
+}
+#if 0
+static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
+    int score=0;
+    int x,y;
+    
+    for(y=0; y<7; y++){
+        for(x=0; x<16; x+=4){
+            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
+                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+    
+    return score;
+}
+
+static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
+    int score=0;
+    int x,y;
+    
+    for(y=0; y<7; y++){
+        for(x=0; x<16; x++){
+            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    
+    return score;
+}
+#else
+#define SQ(a) ((a)*(a))
+
+static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
+    int score=0;
+    int x,y;
+    
+    for(y=0; y<7; y++){
+        for(x=0; x<16; x+=4){
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
+        }
+        s+= stride;
+    }
+    
+    return score;
+}
+
+static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
+    int score=0;
+    int x,y;
+    
+    for(y=0; y<7; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+    
+    return score;
+}
+
+#endif
  static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
  {
      const int mb_x= s->mb_x;
      const int mb_y= s->mb_y;
      int i;
      int skip_dct[6];
-#if 0
-        if (s->interlaced_dct) {
-            dct_linesize = s->linesize * 2;
-            dct_offset = s->linesize;
-        } else {
-            dct_linesize = s->linesize;
-            dct_offset = s->linesize * 8;
-        }
-#endif
+    int dct_offset   = s->linesize*8; //default for progressive frames
+    
      for(i=0; i<6; i++) skip_dct[i]=0;
+    
+    if(s->adaptive_quant){
+        s->dquant= s->qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
+
+        if(s->out_format==FMT_H263){
+            if     (s->dquant> 2) s->dquant= 2;
+            else if(s->dquant<-2) s->dquant=-2;
+        }
+            
+        if(s->codec_id==CODEC_ID_MPEG4){        
+            if(!s->mb_intra){
+                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
+
+                if(s->mv_dir&MV_DIRECT)
+                    s->dquant=0;
+            }
+        }
+        s->qscale+= s->dquant;
+        s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
+    }
  
      if (s->mb_intra) {
          UINT8 *ptr;
-        int wrap;
+        int wrap_y;
          int emu=0;
  
-        wrap = s->linesize;
-        ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16;
+        wrap_y = s->linesize;
+        ptr = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
+
          if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            emulated_edge_mc(s, ptr, wrap, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+            emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
              ptr= s->edge_emu_buffer;
              emu=1;
          }
-        get_pixels(s->block[0], ptr               , wrap);
-        get_pixels(s->block[1], ptr            + 8, wrap);
-        get_pixels(s->block[2], ptr + 8 * wrap    , wrap);
-        get_pixels(s->block[3], ptr + 8 * wrap + 8, wrap);
+        
+        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
+            int progressive_score, interlaced_score;
+            
+            progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
+            interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
+            
+            if(progressive_score > interlaced_score + 100){
+                s->interlaced_dct=1;
+            
+                dct_offset= wrap_y;
+                wrap_y<<=1;
+            }else
+                s->interlaced_dct=0;
+        }
+        
+        get_pixels(s->block[0], ptr                 , wrap_y);
+        get_pixels(s->block[1], ptr              + 8, wrap_y);
+        get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
+        get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
  
          if(s->flags&CODEC_FLAG_GRAY){
              skip_dct[4]= 1;
              skip_dct[5]= 1;
          }else{
-            wrap >>=1;
-            ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8;
+            int wrap_c = s->uvlinesize;
+            ptr = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                emulated_edge_mc(s, ptr, wrap, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr= s->edge_emu_buffer;
              }
-            get_pixels(s->block[4], ptr, wrap);
+            get_pixels(s->block[4], ptr, wrap_c);
  
-            ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8;
+            ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                emulated_edge_mc(s, ptr, wrap, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr= s->edge_emu_buffer;
              }
-            get_pixels(s->block[5], ptr, wrap);
+            get_pixels(s->block[5], ptr, wrap_c);
          }
      }else{
-        op_pixels_func *op_pix;
-        qpel_mc_func *op_qpix;
+        op_pixels_func (*op_pix)[4];
+        qpel_mc_func (*op_qpix)[16];
          UINT8 *dest_y, *dest_cb, *dest_cr;
          UINT8 *ptr_y, *ptr_cb, *ptr_cr;
          int wrap_y, wrap_c;
@@ -1696,25 +2059,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          dest_cb = s->current_picture[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
          dest_cr = s->current_picture[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
          wrap_y = s->linesize;
-        wrap_c = wrap_y>>1;
+        wrap_c = s->uvlinesize;
          ptr_y  = s->new_picture[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
          ptr_cb = s->new_picture[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
          ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
  
          if ((!s->no_rounding) || s->pict_type==B_TYPE){
              op_pix = put_pixels_tab;
-            op_qpix= qpel_mc_rnd_tab;
+            op_qpix= put_qpel_pixels_tab;
          }else{
              op_pix = put_no_rnd_pixels_tab;
-            op_qpix= qpel_mc_no_rnd_tab;
+            op_qpix= put_no_rnd_qpel_pixels_tab;
          }
  
          if (s->mv_dir & MV_DIR_FORWARD) {
              MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-           if ((!s->no_rounding) || s->pict_type==B_TYPE)
-                op_pix = avg_pixels_tab;
-            else
-                op_pix = avg_no_rnd_pixels_tab;
+            op_pix = avg_pixels_tab;
+            op_qpix= avg_qpel_pixels_tab;
          }
          if (s->mv_dir & MV_DIR_BACKWARD) {
              MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
@@ -1725,10 +2086,28 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              ptr_y= s->edge_emu_buffer;
              emu=1;
          }
+        
+        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
+            int progressive_score, interlaced_score;
+            
+            progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
+                             + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
+            interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
+                             + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
+            
+            if(progressive_score > interlaced_score + 600){
+                s->interlaced_dct=1;
+            
+                dct_offset= wrap_y;
+                wrap_y<<=1;
+            }else
+                s->interlaced_dct=0;
+        }
+        
          diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
          diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
-        diff_pixels(s->block[2], ptr_y + 8 * wrap_y    , dest_y + 8 * wrap_y    , wrap_y);
-        diff_pixels(s->block[3], ptr_y + 8 * wrap_y + 8, dest_y + 8 * wrap_y + 8, wrap_y);
+        diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
+        diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
          
          if(s->flags&CODEC_FLAG_GRAY){
              skip_dct[4]= 1;
@@ -1748,10 +2127,11 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
  
          /* pre quantization */         
          if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
+            //FIXME optimize
              if(pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
              if(pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
-            if(pix_abs8x8(ptr_y + 8*wrap_y    , dest_y + 8*wrap_y    , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
-            if(pix_abs8x8(ptr_y + 8*wrap_y + 8, dest_y + 8*wrap_y + 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
+            if(pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
+            if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
              if(pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
              if(pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
  #if 0
@@ -1790,14 +2170,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
      if(s->out_format==FMT_MJPEG){
          for(i=0;i<6;i++) {
              int overflow;
-            s->block_last_index[i] = dct_quantize(s, s->block[i], i, 8, &overflow);
+            s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
              if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
          }
      }else{
          for(i=0;i<6;i++) {
              if(!skip_dct[i]){
                  int overflow;
-                s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale, &overflow);
+                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
              // FIXME we could decide to change to quantizer instead of clipping
              // JS: I don't think that would be a good idea it could lower quality instead
              //     of improve it. Just INTRADC clipping deserves changes in quantizer
@@ -1807,10 +2187,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          }
          if(s->luma_elim_threshold && !s->mb_intra)
              for(i=0; i<4; i++)
-                dct_single_coeff_elimination(s, i, s->luma_elim_threshold, 0);
+                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
          if(s->chroma_elim_threshold && !s->mb_intra)
              for(i=4; i<6; i++)
-                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold, 1);
+                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
      }
  
      if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
@@ -1866,7 +2246,8 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
      d->i_tex_bits= s->i_tex_bits;
      d->p_tex_bits= s->p_tex_bits;
      d->i_count= s->i_count;
-    d->p_count= s->p_count;
+    d->f_count= s->f_count;
+    d->b_count= s->b_count;
      d->skip_count= s->skip_count;
      d->misc_bits= s->misc_bits;
      d->last_bits= 0;
@@ -1890,7 +2271,8 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
      d->i_tex_bits= s->i_tex_bits;
      d->p_tex_bits= s->p_tex_bits;
      d->i_count= s->i_count;
-    d->p_count= s->p_count;
+    d->f_count= s->f_count;
+    d->b_count= s->b_count;
      d->skip_count= s->skip_count;
      d->misc_bits= s->misc_bits;
  
@@ -1973,6 +2355,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      if (s->h263_pred && !s->h263_msmpeg4)
          ff_set_mpeg4_time(s, s->picture_number); 
  
+    s->scene_change_score=0;
+    
+    s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
+
      /* Estimate motion for every MB */
      if(s->pict_type != I_TYPE){
          for(mb_y=0; mb_y < s->mb_height; mb_y++) {
@@ -2003,16 +2389,35 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
          memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
          memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
-    }
+        
+        if(!s->fixed_qscale){
+            /* finding spatial complexity for I-frame rate control */
+            for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+                for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+                    int xx = mb_x * 16;
+                    int yy = mb_y * 16;
+                    uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
+                    int varc;
+                    int sum = pix_sum(pix, s->linesize);
+    
+                    sum= (sum+8)>>4;
+                    varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
  
-    if(s->mb_var_sum < s->mc_mb_var_sum && s->pict_type == P_TYPE){ //FIXME subtract MV bits
+                    s->mb_var [s->mb_width * mb_y + mb_x] = varc;
+                    s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+7)>>4;
+                    s->mb_var_sum    += varc;
+                }
+            }
+        }
+    }
+    if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
          s->pict_type= I_TYPE;
          memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
          if(s->max_b_frames==0){
              s->input_pict_type= I_TYPE;
              s->input_picture_in_gop_number=0;
          }
-//printf("Scene change detected, encoding as I Frame\n");
+//printf("Scene change detected, encoding as I Frame %d %d\n", s->mb_var_sum, s->mc_mb_var_sum);
      }
      
      if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) 
@@ -2028,21 +2433,36 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
      }
      
-//printf("f_code %d ///\n", s->f_code);
-
-//    printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
-    if(s->flags&CODEC_FLAG_PASS2)
-        s->qscale = ff_rate_estimate_qscale_pass2(s);
-    else if (!s->fixed_qscale) 
-        s->qscale = ff_rate_estimate_qscale(s);
+    if (s->fixed_qscale) 
+        s->frame_qscale = s->avctx->quality;
+    else
+        s->frame_qscale = ff_rate_estimate_qscale(s);
+
+    if(s->adaptive_quant){
+        switch(s->codec_id){
+        case CODEC_ID_MPEG4:
+            ff_clean_mpeg4_qscales(s);
+            break;
+        case CODEC_ID_H263:
+        case CODEC_ID_H263P:
+            ff_clean_h263_qscales(s);
+            break;
+        }
  
+        s->qscale= s->qscale_table[0];
+    }else
+        s->qscale= (int)(s->frame_qscale + 0.5);
+        
      if (s->out_format == FMT_MJPEG) {
          /* for mjpeg, we do include qscale in the matrix */
          s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
-        for(i=1;i<64;i++)
-            s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
-        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, 
-                       s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias);
+        for(i=1;i<64;i++){
+            int j= s->idct_permutation[i];
+
+            s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
+        }
+        convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, 
+                       s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
      }
  
      s->last_bits= get_bit_count(&s->pb);
@@ -2072,7 +2492,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      s->i_tex_bits=0;
      s->p_tex_bits=0;
      s->i_count=0;
-    s->p_count=0;
+    s->f_count=0;
+    s->b_count=0;
      s->skip_count=0;
  
      /* init last dc values */
@@ -2406,11 +2827,13 @@ static int dct_quantize_c(MpegEncContext *s,
      int max=0;
      unsigned int threshold1, threshold2;
      
-    av_fdct (block);
+    s->fdct (block);
  
+#ifndef ARCH_ALPHA              /* Alpha uses unpermuted matrix */
      /* we need this permutation so that we correct the IDCT
         permutation. will be moved into DCT code */
-    block_permute(block);
+    block_permute(block, s->idct_permutation); //FIXME remove
+#endif
  
      if (s->mb_intra) {
          if (!s->h263_aic) {
@@ -2428,18 +2851,18 @@ static int dct_quantize_c(MpegEncContext *s,
          i = 1;
          last_non_zero = 0;
          qmat = s->q_intra_matrix[qscale];
-        bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
+        bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
      } else {
          i = 0;
          last_non_zero = -1;
          qmat = s->q_inter_matrix[qscale];
-        bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT);
+        bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
      }
-    threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1;
-    threshold2= threshold1<<1;
+    threshold1= (1<<QMAT_SHIFT) - bias - 1;
+    threshold2= (threshold1<<1);
  
      for(;i<64;i++) {
-        j = zigzag_direct[i];
+        j = s->intra_scantable.permutated[i];
          level = block[j];
          level = level * qmat[j];
  
@@ -2447,10 +2870,10 @@ static int dct_quantize_c(MpegEncContext *s,
  //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
          if(((unsigned)(level+threshold1))>threshold2){
              if(level>0){
-                level= (bias + level)>>(QMAT_SHIFT - 3);
+                level= (bias + level)>>QMAT_SHIFT;
                  block[j]= level;
              }else{
-                level= (bias - level)>>(QMAT_SHIFT - 3);
+                level= (bias - level)>>QMAT_SHIFT;
                  block[j]= -level;
              }
              max |=level;
@@ -2470,8 +2893,7 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
      int i, level, nCoeffs;
      const UINT16 *quant_matrix;
  
-    if(s->alternate_scan) nCoeffs= 64;
-    else nCoeffs= s->block_last_index[n]+1;
+    nCoeffs= s->block_last_index[n];
      
      if (s->mb_intra) {
          if (n < 4) 
@@ -2480,8 +2902,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
              block[0] = block[0] * s->c_dc_scale;
          /* XXX: only mpeg1 */
          quant_matrix = s->intra_matrix;
-        for(i=1;i<nCoeffs;i++) {
-            int j= zigzag_direct[i];
+        for(i=1;i<=nCoeffs;i++) {
+            int j= s->intra_scantable.permutated[i];
              level = block[j];
              if (level) {
                  if (level < 0) {
@@ -2503,8 +2925,8 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
      } else {
          i = 0;
          quant_matrix = s->inter_matrix;
-        for(;i<nCoeffs;i++) {
-            int j= zigzag_direct[i];
+        for(;i<=nCoeffs;i++) {
+            int j= s->intra_scantable.permutated[i];
              level = block[j];
              if (level) {
                  if (level < 0) {
@@ -2534,8 +2956,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
      int i, level, nCoeffs;
      const UINT16 *quant_matrix;
  
-    if(s->alternate_scan) nCoeffs= 64;
-    else nCoeffs= s->block_last_index[n]+1;
+    if(s->alternate_scan) nCoeffs= 63;
+    else nCoeffs= s->block_last_index[n];
      
      if (s->mb_intra) {
          if (n < 4) 
@@ -2543,8 +2965,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
          else
              block[0] = block[0] * s->c_dc_scale;
          quant_matrix = s->intra_matrix;
-        for(i=1;i<nCoeffs;i++) {
-            int j= zigzag_direct[i];
+        for(i=1;i<=nCoeffs;i++) {
+            int j= s->intra_scantable.permutated[i];
              level = block[j];
              if (level) {
                  if (level < 0) {
@@ -2565,8 +2987,8 @@ static void dct_unquantize_mpeg2_c(MpegEncContext *s,
          int sum=-1;
          i = 0;
          quant_matrix = s->inter_matrix;
-        for(;i<nCoeffs;i++) {
-            int j= zigzag_direct[i];
+        for(;i<=nCoeffs;i++) {
+            int j= s->intra_scantable.permutated[i];
              level = block[j];
              if (level) {
                  if (level < 0) {
@@ -2597,27 +3019,27 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
      int i, level, qmul, qadd;
      int nCoeffs;
      
+    assert(s->block_last_index[n]>=0);
+    
+    qadd = (qscale - 1) | 1;
+    qmul = qscale << 1;
+    
      if (s->mb_intra) {
          if (!s->h263_aic) {
              if (n < 4) 
                  block[0] = block[0] * s->y_dc_scale;
              else
                  block[0] = block[0] * s->c_dc_scale;
-        }
+        }else
+            qadd = 0;
          i = 1;
-        nCoeffs= 64; //does not allways use zigzag table 
+        nCoeffs= 63; //does not allways use zigzag table 
      } else {
          i = 0;
-        nCoeffs= zigzag_end[ s->block_last_index[n] ];
+        nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
      }
  
-    qmul = s->qscale << 1;
-    if (s->h263_aic && s->mb_intra)
-        qadd = 0;
-    else
-        qadd = (s->qscale - 1) | 1;
-
-    for(;i<nCoeffs;i++) {
+    for(;i<=nCoeffs;i++) {
          level = block[i];
          if (level) {
              if (level < 0) {
@@ -2669,7 +3091,7 @@ void ff_conceal_past_errors(MpegEncContext *s, int unknown_pos)
      int i, intra_count=0, inter_count=0;
      int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune
      int inter_conceal= s->msmpeg4_version ? 50 : 50;
-    
+
      // for last block
      if(mb_x>=s->mb_width)  mb_x= s->mb_width -1;
      if(mb_y>=s->mb_height) mb_y= s->mb_height-1;