fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me

[ffmpeg] / libavcodec / mpegvideo.c
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index 26398172656544731be24c40251634ce5c360e56..c0dd96bbece57c0564850f0bf67b0e0e22a2ae65 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -82,7 +82,7 @@ static const uint8_t h263_chroma_roundtab[16] = {
  };
  
  #ifdef CONFIG_ENCODERS
-static uint16_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
+static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  static uint8_t default_fcode_tab[MAX_MV*2+1];
  
  enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
@@ -136,16 +136,6 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
  }
  #endif //CONFIG_ENCODERS
  
-// move into common.c perhaps 
-#define CHECKED_ALLOCZ(p, size)\
-{\
-    p= av_mallocz(size);\
-    if(p==NULL){\
-        perror("malloc");\
-        goto fail;\
-    }\
-}
-
  void ff_init_scantable(MpegEncContext *s, ScanTable *st, const uint8_t *src_scantable){
      int i;
      int end;
@@ -227,6 +217,9 @@ int DCT_common_init(MpegEncContext *s)
   * The pixels are allocated/set by calling get_buffer() if shared=0
   */
  static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
+    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
+    const int mb_array_size= s->mb_stride*s->mb_height;
+    int i;
      
      if(shared){
          assert(pic->data[0]);
@@ -260,17 +253,25 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
      
      if(pic->qscale_table==NULL){
          if (s->encoding) {        
-            CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t))
+            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
+            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
          }
  
-        CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(uint8_t)+1) //the +1 is for the slice end check
-        CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(uint8_t))
-        pic->qstride= s->mb_width;
+        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
+        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
+        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
+        if(s->out_format == FMT_H264){
+            for(i=0; i<2; i++){
+                CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
+                CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
+            }
+        }
+        pic->qstride= s->mb_stride;
      }
-    
+
      //it might be nicer if the application would keep track of these but it would require a API change
      memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
      s->prev_pict_types[0]= s->pict_type;
@@ -298,15 +299,14 @@ static void free_picture(MpegEncContext *s, Picture *pic){
      av_freep(&pic->mb_cmp_score);
      av_freep(&pic->mbskip_table);
      av_freep(&pic->qscale_table);
+    av_freep(&pic->mb_type_base);
+    pic->mb_type= NULL;
+    for(i=0; i<2; i++){
+        av_freep(&pic->motion_val[i]);
+        av_freep(&pic->ref_index[i]);
+    }
      
-    if(pic->type == FF_BUFFER_TYPE_INTERNAL){
-        for(i=0; i<4; i++){
-            av_freep(&pic->base[i]);
-            pic->data[i]= NULL;
-        }
-        av_freep(&pic->opaque);
-        pic->type= 0;
-    }else if(pic->type == FF_BUFFER_TYPE_SHARED){
+    if(pic->type == FF_BUFFER_TYPE_SHARED){
          for(i=0; i<4; i++){
              pic->base[i]=
              pic->data[i]= NULL;
@@ -318,7 +318,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
  /* init common structure for both encoder and decoder */
  int MPV_common_init(MpegEncContext *s)
  {
-    int y_size, c_size, yc_size, i;
+    int y_size, c_size, yc_size, i, mb_array_size, x, y;
  
      dsputil_init(&s->dsp, s->avctx);
      DCT_common_init(s);
@@ -327,12 +327,21 @@ int MPV_common_init(MpegEncContext *s)
  
      s->mb_width  = (s->width  + 15) / 16;
      s->mb_height = (s->height + 15) / 16;
+    s->mb_stride = s->mb_width + 1;
+    mb_array_size= s->mb_height * s->mb_stride;
  
      /* set default edge pos, will be overriden in decode_header if needed */
      s->h_edge_pos= s->mb_width*16;
      s->v_edge_pos= s->mb_height*16;
  
      s->mb_num = s->mb_width * s->mb_height;
+    
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->mb_width*2 + 2;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_width + 2;
  
      y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
      c_size = (s->mb_width + 2) * (s->mb_height + 2);
@@ -349,16 +358,30 @@ int MPV_common_init(MpegEncContext *s)
  
      s->avctx->coded_frame= (AVFrame*)&s->current_picture;
  
+    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
+        }
+    }
+    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
+    
      if (s->encoding) {
-        int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
+        int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
  
          /* Allocate MV tables */
-        CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
+        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
+        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
+        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
+        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
+        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
  
          //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
          CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
@@ -375,14 +398,15 @@ int MPV_common_init(MpegEncContext *s)
              CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
          }
          CHECKED_ALLOCZ(s->avctx->stats_out, 256);
+
+        /* Allocate MB type table */
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
      }
          
-    CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(uint8_t))
+    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
      
      if (s->out_format == FMT_H263 || s->encoding) {
          int size;
-        /* Allocate MB type table */
-        CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(uint8_t))
  
          /* MV prediction */
          size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
@@ -391,12 +415,9 @@ int MPV_common_init(MpegEncContext *s)
  
      if(s->codec_id==CODEC_ID_MPEG4){
          /* interlaced direct mode decoding tables */
-        CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(int8_t))
+        CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
      }
-    /* 4mv b frame decoding table */
-    //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
-    CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(uint8_t))
      if (s->out_format == FMT_H263) {
          /* ac values */
          CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
@@ -410,8 +431,8 @@ int MPV_common_init(MpegEncContext *s)
          CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
  
          /* cbp, ac_pred, pred_dir */
-        CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(uint8_t))
-        CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
      }
      
      if (s->h263_pred || s->h263_plus || !s->encoding) {
@@ -425,14 +446,14 @@ int MPV_common_init(MpegEncContext *s)
      }
  
      /* which mb is a intra block */
-    CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
-    memset(s->mbintra_table, 1, s->mb_num);
+    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
+    memset(s->mbintra_table, 1, mb_array_size);
      
      /* default structure is frame */
      s->picture_structure = PICT_FRAME;
      
      /* init macroblock skip table */
-    CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
+    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
      //Note the +1 is for a quicker mpeg4 slice_end detection
      CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
      
@@ -456,12 +477,19 @@ void MPV_common_end(MpegEncContext *s)
      int i;
  
      av_freep(&s->mb_type);
-    av_freep(&s->p_mv_table);
-    av_freep(&s->b_forw_mv_table);
-    av_freep(&s->b_back_mv_table);
-    av_freep(&s->b_bidir_forw_mv_table);
-    av_freep(&s->b_bidir_back_mv_table);
-    av_freep(&s->b_direct_mv_table);
+    av_freep(&s->p_mv_table_base);
+    av_freep(&s->b_forw_mv_table_base);
+    av_freep(&s->b_back_mv_table_base);
+    av_freep(&s->b_bidir_forw_mv_table_base);
+    av_freep(&s->b_bidir_back_mv_table_base);
+    av_freep(&s->b_direct_mv_table_base);
+    s->p_mv_table= NULL;
+    s->b_forw_mv_table= NULL;
+    s->b_back_mv_table= NULL;
+    s->b_bidir_forw_mv_table= NULL;
+    s->b_bidir_back_mv_table= NULL;
+    s->b_direct_mv_table= NULL;
+    
      av_freep(&s->motion_val);
      av_freep(&s->dc_val[0]);
      av_freep(&s->ac_val[0]);
@@ -479,16 +507,17 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->tex_pb_buffer);
      av_freep(&s->pb2_buffer);
      av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
-    av_freep(&s->co_located_type_table);
      av_freep(&s->field_mv_table);
      av_freep(&s->field_select_table);
      av_freep(&s->avctx->stats_out);
      av_freep(&s->ac_stats);
      av_freep(&s->error_status_table);
+    av_freep(&s->mb_index2xy);
  
      for(i=0; i<MAX_PICTURE_COUNT; i++){
          free_picture(s, &s->picture[i]);
      }
+    avcodec_default_free_buffers(s->avctx);
      s->context_initialized = 0;
  }
  
@@ -504,7 +533,6 @@ int MPV_encode_init(AVCodecContext *avctx)
  
      s->bit_rate = avctx->bit_rate;
      s->bit_rate_tolerance = avctx->bit_rate_tolerance;
-    s->frame_rate = avctx->frame_rate;
      s->width = avctx->width;
      s->height = avctx->height;
      if(avctx->gop_size > 600){
@@ -516,8 +544,6 @@ int MPV_encode_init(AVCodecContext *avctx)
      s->rtp_payload_size = avctx->rtp_payload_size;
      if (avctx->rtp_callback)
          s->rtp_callback = avctx->rtp_callback;
-    s->qmin= avctx->qmin;
-    s->qmax= avctx->qmax;
      s->max_qdiff= avctx->max_qdiff;
      s->qcompress= avctx->qcompress;
      s->qblur= avctx->qblur;
@@ -554,10 +580,52 @@ int MPV_encode_init(AVCodecContext *avctx)
      
      s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
  
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "4MV not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "qpel not supporetd by codec\n");
+        return -1;
+    }
+
+    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "data partitioning not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO){
+        fprintf(stderr, "b frames not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
+        fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
+        return -1;
+    }
+        
+    if(s->codec_id==CODEC_ID_MJPEG){
+        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
+        s->inter_quant_bias= 0;
+    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO){
+        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+        s->inter_quant_bias= 0;
+    }else{
+        s->intra_quant_bias=0;
+        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
+    }
+    
+    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->intra_quant_bias= avctx->intra_quant_bias;
+    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->inter_quant_bias= avctx->inter_quant_bias;
+    
      switch(avctx->codec->id) {
      case CODEC_ID_MPEG1VIDEO:
          s->out_format = FMT_MPEG1;
-        avctx->delay=0; //FIXME not sure, should check the spec
+        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
+        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
          break;
      case CODEC_ID_MJPEG:
          s->out_format = FMT_MJPEG;
@@ -634,6 +702,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 3;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -643,6 +712,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 4;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -652,6 +722,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 5;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -666,8 +737,8 @@ int MPV_encode_init(AVCodecContext *avctx)
              int i;
              done=1;
  
-            default_mv_penalty= av_mallocz( sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
-            memset(default_mv_penalty, 0, sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
+            default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+            memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
              memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
  
              for(i=-16; i<16; i++){
@@ -838,7 +909,7 @@ static int find_unused_picture(MpegEncContext *s, int shared){
          }
      }else{
          for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break;
+            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
          }
          for(i=0; i<MAX_PICTURE_COUNT; i++){
              if(s->picture[i].data[0]==NULL) break;
@@ -856,24 +927,18 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
      AVFrame *pic;
  
      s->mb_skiped = 0;
-    
+
+    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
+
      /* mark&release old frames */
-    if (s->pict_type != B_TYPE && s->last_picture.data[0]) {
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
-//printf("%8X %d %d %X %X\n", s->picture[i].data[0], s->picture[i].type, i, s->next_picture.data[0], s->last_picture.data[0]);
-            if(s->picture[i].data[0] == s->last_picture.data[0]){
-//                s->picture[i].reference=0;
-                avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
-                break;
-            }    
-        }
-        assert(i<MAX_PICTURE_COUNT);
+    if (s->pict_type != B_TYPE && s->last_picture_ptr) {
+        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
  
          /* release forgotten pictures */
          /* if(mpeg124/h263) */
          if(!s->encoding){
              for(i=0; i<MAX_PICTURE_COUNT; i++){
-                if(s->picture[i].data[0] && s->picture[i].data[0] != s->next_picture.data[0] && s->picture[i].reference){
+                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
                      fprintf(stderr, "releasing zombie picture\n");
                      avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
                  }
@@ -882,27 +947,60 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
      }
  alloc:
      if(!s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
+
          i= find_unused_picture(s, 0);
      
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= s->pict_type != B_TYPE;
-        pic->coded_picture_number= s->current_picture.coded_picture_number+1;
+        pic->reference= s->pict_type != B_TYPE ? 3 : 0;
+
+        if(s->current_picture_ptr)
+            pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
          
          alloc_picture(s, (Picture*)pic, 0);
  
-        s->current_picture= s->picture[i];
+        s->current_picture_ptr= &s->picture[i];
      }
  
+    s->current_picture_ptr->pict_type= s->pict_type;
+    s->current_picture_ptr->quality= s->qscale;
+    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
+
+    s->current_picture= *s->current_picture_ptr;
+  
+  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
      if (s->pict_type != B_TYPE) {
-        s->last_picture= s->next_picture;
-        s->next_picture= s->current_picture;
+        s->last_picture_ptr= s->next_picture_ptr;
+        s->next_picture_ptr= s->current_picture_ptr;
      }
      
-    if(s->pict_type != I_TYPE && s->last_picture.data[0]==NULL){
+    if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
+    if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
+    if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
+    
+    if(s->picture_structure!=PICT_FRAME){
+        int i;
+        for(i=0; i<4; i++){
+            if(s->picture_structure == PICT_BOTTOM_FIELD){
+                 s->current_picture.data[i] += s->current_picture.linesize[i];
+            } 
+            s->current_picture.linesize[i] *= 2;
+            s->last_picture.linesize[i] *=2;
+            s->next_picture.linesize[i] *=2;
+        }
+    }
+    
+    if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
          fprintf(stderr, "warning: first frame is no keyframe\n");
          assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
          goto alloc;
      }
+  }
     
      s->hurry_up= s->avctx->hurry_up;
      s->error_resilience= avctx->error_resilience;
@@ -938,12 +1036,8 @@ void MPV_frame_end(MpegEncContext *s)
      if(s->pict_type!=B_TYPE){
          s->last_non_b_pict_type= s->pict_type;
      }
-    
-    s->current_picture.quality= s->qscale; //FIXME get average of qscale_table
-    s->current_picture.pict_type= s->pict_type;
-    s->current_picture.key_frame= s->pict_type == I_TYPE;
-    
-    /* copy back current_picture variables */
+#if 0
+        /* copy back current_picture variables */
      for(i=0; i<MAX_PICTURE_COUNT; i++){
          if(s->picture[i].data[0] == s->current_picture.data[0]){
              s->picture[i]= s->current_picture;
@@ -951,23 +1045,97 @@ void MPV_frame_end(MpegEncContext *s)
          }    
      }
      assert(i<MAX_PICTURE_COUNT);
+#endif    
  
-    /* release non refernce frames */
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
-        if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
-            s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+    if(s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
      }
-    if(s->avctx->debug&FF_DEBUG_SKIP){
-        int x,y;        
+    // clear copies, to avoid confusion
+#if 0
+    memset(&s->last_picture, 0, sizeof(Picture));
+    memset(&s->next_picture, 0, sizeof(Picture));
+    memset(&s->current_picture, 0, sizeof(Picture));
+#endif
+}
+
+/**
+ * prints debuging info for the given picture.
+ */
+void ff_print_debug_info(MpegEncContext *s, Picture *pict){
+
+    if(!pict || !pict->mb_type) return;
+
+    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
+        int x,y;
+
          for(y=0; y<s->mb_height; y++){
              for(x=0; x<s->mb_width; x++){
-                int count= s->mbskip_table[x + y*s->mb_width];
-                if(count>9) count=9;
-                printf(" %1d", count);
+                if(s->avctx->debug&FF_DEBUG_SKIP){
+                    int count= s->mbskip_table[x + y*s->mb_stride];
+                    if(count>9) count=9;
+                    printf("%1d", count);
+                }
+                if(s->avctx->debug&FF_DEBUG_QP){
+                    printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
+                }
+                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
+                    int mb_type= pict->mb_type[x + y*s->mb_stride];
+                    
+                    //Type & MV direction
+                    if(IS_PCM(mb_type))
+                        printf("P");
+                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
+                        printf("A");
+                    else if(IS_INTRA4x4(mb_type))
+                        printf("i");
+                    else if(IS_INTRA16x16(mb_type))
+                        printf("I");
+                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
+                        printf("d");
+                    else if(IS_DIRECT(mb_type))
+                        printf("D");
+                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
+                        printf("g");
+                    else if(IS_GMC(mb_type))
+                        printf("G");
+                    else if(IS_SKIP(mb_type))
+                        printf("S");
+                    else if(!USES_LIST(mb_type, 1))
+                        printf(">");
+                    else if(!USES_LIST(mb_type, 0))
+                        printf("<");
+                    else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        printf("X");
+                    }
+                    
+                    //segmentation
+                    if(IS_8X8(mb_type))
+                        printf("+");
+                    else if(IS_16X8(mb_type))
+                        printf("-");
+                    else if(IS_8X16(mb_type))
+                        printf("¦");
+                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
+                        printf(" ");
+                    else
+                        printf("?");
+                    
+                        
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
+                        printf("=");
+                    else
+                        printf(" ");
+                }
+//                printf(" ");
              }
              printf("\n");
          }
-        printf("pict type: %d\n", s->pict_type);
      }
  }
  
@@ -1024,7 +1192,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
          i= find_unused_picture(s, 1);
  
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
      
          for(i=0; i<4; i++){
              pic->data[i]= pic_arg->data[i];
@@ -1035,9 +1203,15 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
          i= find_unused_picture(s, 0);
  
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
  
          alloc_picture(s, (Picture*)pic, 0);
+        for(i=0; i<4; i++){
+            /* the input will be 16 pixels to the right relative to the actual buffer start
+             * and the current_pic, so the buffer can be reused, yes its not beatifull 
+             */
+            pic->data[i]+= 16; 
+        }
  
          if(   pic->data[0] == pic_arg->data[0] 
             && pic->data[1] == pic_arg->data[1]
@@ -1100,7 +1274,7 @@ static void select_input_picture(MpegEncContext *s){
  
      /* set next picture types & ordering */
      if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
-        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture.data[0]==NULL || s->intra_only){
+        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
              s->reordered_input_picture[0]= s->input_picture[0];
              s->reordered_input_picture[0]->pict_type= I_TYPE;
              s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
@@ -1174,19 +1348,22 @@ static void select_input_picture(MpegEncContext *s){
      }
      
      if(s->reordered_input_picture[0]){
-       s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE;
+        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
+
+        s->new_picture= *s->reordered_input_picture[0];
  
          if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
+            // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
+        
              int i= find_unused_picture(s, 0);
              Picture *pic= &s->picture[i];
  
-            s->new_picture= *s->reordered_input_picture[0];
-
              /* mark us unused / free shared pic */
              for(i=0; i<4; i++)
                  s->reordered_input_picture[0]->data[i]= NULL;
              s->reordered_input_picture[0]->type= 0;
              
+            //FIXME bad, copy * except
              pic->pict_type = s->reordered_input_picture[0]->pict_type;
              pic->quality   = s->reordered_input_picture[0]->quality;
              pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
@@ -1194,18 +1371,20 @@ static void select_input_picture(MpegEncContext *s){
              
              alloc_picture(s, pic, 0);
  
-            s->current_picture= *pic;
+            s->current_picture_ptr= pic;
          }else{
+            // input is not a shared pix -> reuse buffer for current_pix
+
              assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
                     || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
              
-            s->new_picture= *s->reordered_input_picture[0];
-
+            s->current_picture_ptr= s->reordered_input_picture[0];
              for(i=0; i<4; i++){
-                s->reordered_input_picture[0]->data[i]-=16; //FIXME dirty
+                //reverse the +16 we did before storing the input
+                s->current_picture_ptr->data[i]-=16;
              }
-            s->current_picture= *s->reordered_input_picture[0];
          }
+        s->current_picture= *s->current_picture_ptr;
      
          s->picture_number= s->new_picture.display_picture_number;
  //printf("dpn:%d\n", s->picture_number);
@@ -1260,6 +1439,10 @@ int MPV_encode_picture(AVCodecContext *avctx,
          
          if(s->flags&CODEC_FLAG_PASS1)
              ff_write_pass1_stats(s);
+
+        for(i=0; i<4; i++){
+            avctx->error[i] += s->current_picture_ptr->error[i];
+        }
      }
  
      s->input_picture_number++;
@@ -1269,10 +1452,6 @@ int MPV_encode_picture(AVCodecContext *avctx,
      
      s->total_bits += s->frame_bits;
      avctx->frame_bits  = s->frame_bits;
-
-    for(i=0; i<4; i++){
-        avctx->error[i] += s->current_picture.error[i];
-    }
      
      return pbBufPtr(&s->pb) - s->pb.buf;
  }
@@ -1523,15 +1702,15 @@ if(s->quarter_sample)
      src_y = clip(src_y, -16, height);
      if (src_y == height)
          dxy &= ~2;
-    linesize   = s->linesize << field_based;
-    uvlinesize = s->uvlinesize << field_based;
+    linesize   = s->current_picture.linesize[0] << field_based;
+    uvlinesize = s->current_picture.linesize[1] << field_based;
      ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
      dest_y += dest_offset;
  
      if(s->flags&CODEC_FLAG_EMU_EDGE){
          if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
                                || src_y + (motion_y&1) + h  > v_edge_pos){
-            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
+            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
                               src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer + src_offset;
              emu=1;
@@ -1690,6 +1869,18 @@ inline int ff_h263_round_chroma(int x){
      }
  }
  
+/**
+ * motion compesation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pic_op halfpel motion compensation function (average or put normally)
+ * @param pic_op qpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
  static inline void MPV_motion(MpegEncContext *s, 
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int dir, uint8_t **ref_picture, 
@@ -1864,10 +2055,10 @@ static inline void MPV_motion(MpegEncContext *s,
          } else {
              int offset;
              if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
-                offset= s->field_select[dir][0] ? s->linesize/2 : 0;
+                offset= s->field_select[dir][0] ? s->linesize : 0;
              }else{
                  ref_picture= s->current_picture.data;
-                offset= s->field_select[dir][0] ? s->linesize/2 : -s->linesize/2; 
+                offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
              } 
  
              mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
@@ -1937,7 +2128,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
      memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
      memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
      
-    s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
+    s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
  }
  
  /* generic function called after a macroblock has been parsed by the
@@ -1953,7 +2144,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
  void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
  {
      int mb_x, mb_y;
-    const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
+    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
  
      mb_x = s->mb_x;
      mb_y = s->mb_y;
@@ -1979,33 +2170,26 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
          //FIXME a lot of thet is only needed for !low_delay
          const int wrap = s->block_wrap[0];
          const int xy = s->block_index[0];
-        const int mb_index= s->mb_x + s->mb_y*s->mb_width;
-        if(s->mv_type == MV_TYPE_8X8){
-            s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
-        } else {
+        if(s->mv_type != MV_TYPE_8X8){
              int motion_x, motion_y;
              if (s->mb_intra) {
                  motion_x = 0;
                  motion_y = 0;
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
              } else if (s->mv_type == MV_TYPE_16X16) {
                  motion_x = s->mv[0][0][0];
                  motion_y = s->mv[0][0][1];
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
              } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
                  int i;
                  motion_x = s->mv[0][0][0] + s->mv[0][1][0];
                  motion_y = s->mv[0][0][1] + s->mv[0][1][1];
                  motion_x = (motion_x>>1) | (motion_x&1);
                  for(i=0; i<2; i++){
-                    s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
-                    s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
-                    s->field_select_table[mb_index][i]= s->field_select[0][i];
+                    s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
+                    s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
+                    s->field_select_table[mb_xy][i]= s->field_select[0][i];
                  }
-                s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
              }
+            
              /* no update if 8X8 because it has been done during parsing */
              s->motion_val[xy][0] = motion_x;
              s->motion_val[xy][1] = motion_y;
@@ -2016,6 +2200,13 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              s->motion_val[xy + 1 + wrap][0] = motion_x;
              s->motion_val[xy + 1 + wrap][1] = motion_y;
          }
+
+        if(s->encoding){ //FIXME encoding MUST be cleaned up
+            if (s->mv_type == MV_TYPE_8X8) 
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+            else
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
+        }
      }
      
      if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
@@ -2023,6 +2214,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
          int dct_linesize, dct_offset;
          op_pixels_func (*op_pix)[4];
          qpel_mc_func (*op_qpix)[16];
+        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
+        const int uvlinesize= s->current_picture.linesize[1];
  
          /* avoid copy if macroblock skipped in last frame too */
          if (s->pict_type != B_TYPE) {
@@ -2061,17 +2254,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              dest_cb = s->current_picture.data[1] + mb_x * 8;
              dest_cr = s->current_picture.data[2] + mb_x * 8;
          }else{
-            dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
-            dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
-            dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+            dest_y  = s->current_picture.data[0] + (mb_y * 16* linesize  ) + mb_x * 16;
+            dest_cb = s->current_picture.data[1] + (mb_y * 8 * uvlinesize) + mb_x * 8;
+            dest_cr = s->current_picture.data[2] + (mb_y * 8 * uvlinesize) + mb_x * 8;
          }
  
          if (s->interlaced_dct) {
-            dct_linesize = s->linesize * 2;
-            dct_offset = s->linesize;
+            dct_linesize = linesize * 2;
+            dct_offset = linesize;
          } else {
-            dct_linesize = s->linesize;
-            dct_offset = s->linesize * 8;
+            dct_linesize = linesize;
+            dct_offset = linesize * 8;
          }
  
          if (!s->mb_intra) {
@@ -2108,8 +2301,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                  add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
  
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
                  }
              } else if(s->codec_id != CODEC_ID_WMV2){
                  add_dct(s, block[0], 0, dest_y, dct_linesize);
@@ -2118,8 +2311,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                  add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
  
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    add_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    add_dct(s, block[5], 5, dest_cr, uvlinesize);
                  }
              } 
  #ifdef CONFIG_RISKY
@@ -2136,8 +2329,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                  put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
  
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    put_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    put_dct(s, block[5], 5, dest_cr, uvlinesize);
                  }
              }else{
                  s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
@@ -2146,8 +2339,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                  s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
  
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    s->dsp.idct_put(dest_cb, s->uvlinesize, block[4]);
-                    s->dsp.idct_put(dest_cr, s->uvlinesize, block[5]);
+                    s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
+                    s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
                  }
              }
          }
@@ -2300,7 +2493,7 @@ static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move
   */
  void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
      if (    s->avctx->draw_horiz_band 
-        && (s->last_picture.data[0] || s->low_delay) ) {
+        && (s->last_picture_ptr || s->low_delay) ) {
          uint8_t *src_ptr[3];
          int offset;
          h= FFMIN(h, s->height - y);
@@ -2339,7 +2532,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
      for(i=0; i<6; i++) skip_dct[i]=0;
      
      if(s->adaptive_quant){
-        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
+        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
  
          if(s->out_format==FMT_H263){
              if     (s->dquant> 2) s->dquant= 2;
@@ -2348,10 +2541,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              
          if(s->codec_id==CODEC_ID_MPEG4){        
              if(!s->mb_intra){
-                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
-
                  if(s->mv_dir&MV_DIRECT)
                      s->dquant=0;
+
+                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
              }
          }
          s->qscale+= s->dquant;
@@ -2490,7 +2683,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
          }
          /* pre quantization */         
-        if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
+        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
              //FIXME optimize
             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
              if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
@@ -2521,13 +2714,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              {
                  float adap_parm;
                  
-                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
-                            ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
+                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
+                            ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
              
                  printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
-                        (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', 
+                        (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P', 
                          s->qscale, adap_parm, s->qscale*adap_parm,
-                        s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
+                        s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
              }
  #endif
      /* DCT & quantize */
@@ -2597,17 +2790,33 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
   */
  int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
      ParseContext *pc= &s->parse_context;
-        
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
+    /* copy overreaded byes from last frame into buffer */
+    for(; pc->overread>0; pc->overread--){
+        pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
+    }
+    
      pc->last_index= pc->index;
  
-    if(next==-1){
+    /* copy into buffer end return */
+    if(next == END_NOT_FOUND){
          pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
  
          memcpy(&pc->buffer[pc->index], *buf, *buf_size);
          pc->index += *buf_size;
          return -1;
      }
-
+    
+    pc->overread_index= pc->index + next;
+    
+    /* append to buffer */
      if(pc->index){
          pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
  
@@ -2617,6 +2826,19 @@ int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size)
          *buf_size= pc->last_index + next;
      }
  
+    /* store overread bytes */
+    for(;next < 0; next++){
+        pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
      return 0;
  }
  
@@ -2639,7 +2861,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
      memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
  
      /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
      for(i=0; i<3; i++)
          d->last_dc[i]= s->last_dc[i];
      
@@ -2665,7 +2887,7 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
      memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
      
      /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
      for(i=0; i<3; i++)
          d->last_dc[i]= s->last_dc[i];
      
@@ -2765,13 +2987,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      }
  
      s->picture_number = picture_number;
-
-    s->block_wrap[0]=
-    s->block_wrap[1]=
-    s->block_wrap[2]=
-    s->block_wrap[3]= s->mb_width*2 + 2;
-    s->block_wrap[4]=
-    s->block_wrap[5]= s->mb_width + 2;
      
      /* Reset the average MB variance */
      s->current_picture.mb_var_sum = 0;
@@ -2789,8 +3004,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
      
      if(s->pict_type==I_TYPE){
-        if(s->msmpeg4_version) s->no_rounding=1;
-        else                   s->no_rounding=0;
+        if(s->msmpeg4_version >= 3) s->no_rounding=1;
+        else                        s->no_rounding=0;
      }else if(s->pict_type!=B_TYPE){
          if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
              s->no_rounding ^= 1;          
@@ -2840,8 +3055,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          /* I-Frame */
          //FIXME do we need to zero them?
          memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
-        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_width+2)*(s->mb_height+2)*2);
-        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
+        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
          
          if(!s->fixed_qscale){
              /* finding spatial complexity for I-frame rate control */
@@ -2855,8 +3070,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      
                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
  
-                    s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
-                    s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
+                    s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
+                    s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
                      s->current_picture.mb_var_sum    += varc;
                  }
              }
@@ -2866,7 +3081,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
      if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
          s->pict_type= I_TYPE;
-        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
@@ -2976,9 +3191,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          /* note: quant matrix value (8) is implied here */
          s->last_dc[i] = 128;
          
-        s->current_picture.error[i] = 0;
+        s->current_picture_ptr->error[i] = 0;
      }
-    s->mb_incr = 1;
+    s->mb_skip_run = 0;
      s->last_mv[0][0][0] = 0;
      s->last_mv[0][0][1] = 0;
      s->last_mv[1][0][0] = 0;
@@ -2998,7 +3213,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      s->resync_mb_y=0;
      s->first_slice_line = 1;
      s->ptr_lastgob = s->pb.buf;
-    s->ptr_last_mb_line = s->pb.buf;
      for(mb_y=0; mb_y < s->mb_height; mb_y++) {
          s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
          s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
@@ -3010,8 +3224,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
          s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
          for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-            int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
-            const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+            const int xy= mb_y*s->mb_stride + mb_x;
+            int mb_type= s->mb_type[xy];
  //            int d;
              int dmin=10000000;
  
@@ -3033,7 +3247,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  is_gob_start=0;
                  
                  if(s->codec_id==CODEC_ID_MPEG4){
-                    if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                         && s->mb_y + s->mb_x>0){
  
                          if(s->partitioned_frame){
@@ -3050,8 +3264,15 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                          ff_mpeg4_clean_buffers(s);
                          is_gob_start=1;
                      }
+                }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
+                    if(   current_packet_size >= s->rtp_payload_size 
+                       && s->mb_y + s->mb_x>0 && s->mb_skip_run==0){
+                        ff_mpeg1_encode_slice_header(s);
+                        ff_mpeg1_clean_buffers(s);
+                        is_gob_start=1;
+                    }
                  }else{
-                    if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                         && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
                         
                          h263_encode_gob_header(s, mb_y);                       
@@ -3157,7 +3378,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                                   &dmin, &next_block, 0, 0);
                      /* force cleaning of ac/dc pred stuff if needed ... */
                      if(s->h263_pred || s->h263_aic)
-                        s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+                        s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
                  }
                  copy_context_after_encode(s, &best_s, -1);
                  
@@ -3181,16 +3402,16 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              } else {
                  int motion_x, motion_y;
                  int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width];
+                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
                  
                if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){
                  /* get luma score */
                  if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
                  }else{
                      uint8_t *dest_y;
  
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME
+                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
                      mean*= 0x01010101;
                      
                      dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
@@ -3206,8 +3427,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
                                          
  /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_width],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/
+                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
+                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
                  }
                  
                  /* get chroma score */
@@ -3348,34 +3569,23 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                  if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
  
-                s->current_picture.error[0] += sse(
+                s->current_picture_ptr->error[0] += sse(
                      s,
                      s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                      s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                      w, h, s->linesize);
-                s->current_picture.error[1] += sse(
+                s->current_picture_ptr->error[1] += sse(
                      s,
                      s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      w>>1, h>>1, s->uvlinesize);
-                s->current_picture.error[2] += sse(
+                s->current_picture_ptr->error[2] += sse(
                      s,
                      s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      w>>1, h>>1, s->uvlinesize);
              }
-//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
-        }
-
-
-        /* Obtain average mb_row size for RTP */
-        if (s->rtp_mode) {
-            if (mb_y==0)
-                s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
-            else {    
-                s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
-            }
-            s->ptr_last_mb_line = pbBufPtr(&s->pb);
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
          }
      }
      emms_c();
@@ -3698,8 +3908,8 @@ static int dct_quantize_c(MpegEncContext *s,
          level = block[j];
          level = level * qmat[j];
  
-//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
-//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+//        if(   bias+level >= (1<<QMAT_SHIFT)
+//           || bias-level >= (1<<QMAT_SHIFT)){
          if(((unsigned)(level+threshold1))>threshold2){
              if(level>0){
                  level= (bias + level)>>QMAT_SHIFT;
@@ -3901,6 +4111,8 @@ char ff_get_pict_type_char(int pict_type){
      case P_TYPE: return 'P'; 
      case B_TYPE: return 'B'; 
      case S_TYPE: return 'S'; 
+    case SI_TYPE:return 'i'; 
+    case SP_TYPE:return 'p'; 
      default:     return '?';
      }
  }
@@ -3920,6 +4132,58 @@ static const AVOption mpeg4_options[] =
                        rc_min_rate, 4, 24000000, 0),
      AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
                        rc_max_rate, 4, 24000000, 0),
+    AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
+                         rc_buffer_aggressivity, 4, 24000000, 0),
+    AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
+                         rc_initial_cplx, 0., 9999999., 0),
+    AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
+                         i_quant_factor, 0., 0., 0),
+    AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
+                         i_quant_factor, -999999., 999999., 0),
+    AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
+                      dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
+    AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
+                         lumi_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
+                         temporal_cplx_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
+                         spatial_cplx_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
+                         p_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
+                         dark_masking, 0., 999999., 0),
+    AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
+                      idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
+
+    AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
+                      mb_qmin, 0, 8, 0),
+    AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
+                      mb_qmin, 0, 8, 0),
+
+    AVOPTION_CODEC_INT("me_cmp", "ME compare function",
+                      me_cmp, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
+                      me_sub_cmp, 0, 24000000, 0),
+
+
+    AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
+                      dia_size, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
+                      last_predictor_count, 0, 24000000, 0),
+
+    AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
+                      pre_me, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
+                      me_pre_cmp, 0, 24000000, 0),
+
+    AVOPTION_CODEC_INT("me_range", "maximum ME search range",
+                      me_range, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
+                      pre_dia_size, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
+                      me_subpel_quality, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_range", "maximum ME search range",
+                      me_range, 0, 24000000, 0),
      AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
                         flags, CODEC_FLAG_PSNR, 0),
      AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
@@ -3991,6 +4255,7 @@ AVCodec msmpeg4v1_encoder = {
      MPV_encode_init,
      MPV_encode_picture,
      MPV_encode_end,
+    .options = mpeg4_options,
  };
  
  AVCodec msmpeg4v2_encoder = {
@@ -4001,6 +4266,7 @@ AVCodec msmpeg4v2_encoder = {
      MPV_encode_init,
      MPV_encode_picture,
      MPV_encode_end,
+    .options = mpeg4_options,
  };
  
  AVCodec msmpeg4v3_encoder = {
@@ -4011,6 +4277,7 @@ AVCodec msmpeg4v3_encoder = {
      MPV_encode_init,
      MPV_encode_picture,
      MPV_encode_end,
+    .options = mpeg4_options,
  };
  
  AVCodec wmv1_encoder = {
@@ -4021,6 +4288,7 @@ AVCodec wmv1_encoder = {
      MPV_encode_init,
      MPV_encode_picture,
      MPV_encode_end,
+    .options = mpeg4_options,
  };
  
  #endif