flash video (flv) support patch by (Garrick Meeker <gmeeker at theoryllc dot com>)

[ffmpeg] / libavcodec / mpegvideo.c
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c

index da99327dcdee3fe3f6d8b72afbb945173c8cecb9..8b9caaf802bc97d390b79a6c1d8e8824ce680917 100644 (file)
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -82,7 +82,7 @@ static const uint8_t h263_chroma_roundtab[16] = {
  };
  
  #ifdef CONFIG_ENCODERS
-static uint16_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
+static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  static uint8_t default_fcode_tab[MAX_MV*2+1];
  
  enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
@@ -136,17 +136,7 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
  }
  #endif //CONFIG_ENCODERS
  
-// move into common.c perhaps 
-#define CHECKED_ALLOCZ(p, size)\
-{\
-    p= av_mallocz(size);\
-    if(p==NULL){\
-        perror("malloc");\
-        goto fail;\
-    }\
-}
-
-void ff_init_scantable(MpegEncContext *s, ScanTable *st, const uint8_t *src_scantable){
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
      int i;
      int end;
      
@@ -155,7 +145,7 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const uint8_t *src_scan
      for(i=0; i<64; i++){
          int j;
          j = src_scantable[i];
-        st->permutated[i] = s->dsp.idct_permutation[j];
+        st->permutated[i] = permutation[j];
  #ifdef ARCH_POWERPC
          st->inverse[j] = i;
  #endif
@@ -212,10 +202,10 @@ int DCT_common_init(MpegEncContext *s)
      /* load & permutate scantables
         note: only wmv uses differnt ones 
      */
-    ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
-    ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
-    ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
-    ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
+    ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
+    ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
+    ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+    ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
  
      s->picture_structure= PICT_FRAME;
      
@@ -227,6 +217,9 @@ int DCT_common_init(MpegEncContext *s)
   * The pixels are allocated/set by calling get_buffer() if shared=0
   */
  static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
+    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
+    const int mb_array_size= s->mb_stride*s->mb_height;
+    int i;
      
      if(shared){
          assert(pic->data[0]);
@@ -260,17 +253,25 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
      
      if(pic->qscale_table==NULL){
          if (s->encoding) {        
-            CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t))
+            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
+            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
          }
  
-        CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(uint8_t)+1) //the +1 is for the slice end check
-        CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(uint8_t))
-        pic->qstride= s->mb_width;
+        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
+        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
+        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
+        if(s->out_format == FMT_H264){
+            for(i=0; i<2; i++){
+                CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
+                CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
+            }
+        }
+        pic->qstride= s->mb_stride;
      }
-    
+
      //it might be nicer if the application would keep track of these but it would require a API change
      memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
      s->prev_pict_types[0]= s->pict_type;
@@ -298,15 +299,14 @@ static void free_picture(MpegEncContext *s, Picture *pic){
      av_freep(&pic->mb_cmp_score);
      av_freep(&pic->mbskip_table);
      av_freep(&pic->qscale_table);
+    av_freep(&pic->mb_type_base);
+    pic->mb_type= NULL;
+    for(i=0; i<2; i++){
+        av_freep(&pic->motion_val[i]);
+        av_freep(&pic->ref_index[i]);
+    }
      
-    if(pic->type == FF_BUFFER_TYPE_INTERNAL){
-        for(i=0; i<4; i++){
-            av_freep(&pic->base[i]);
-            pic->data[i]= NULL;
-        }
-        av_freep(&pic->opaque);
-        pic->type= 0;
-    }else if(pic->type == FF_BUFFER_TYPE_SHARED){
+    if(pic->type == FF_BUFFER_TYPE_SHARED){
          for(i=0; i<4; i++){
              pic->base[i]=
              pic->data[i]= NULL;
@@ -318,7 +318,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
  /* init common structure for both encoder and decoder */
  int MPV_common_init(MpegEncContext *s)
  {
-    int y_size, c_size, yc_size, i;
+    int y_size, c_size, yc_size, i, mb_array_size, x, y;
  
      dsputil_init(&s->dsp, s->avctx);
      DCT_common_init(s);
@@ -327,12 +327,21 @@ int MPV_common_init(MpegEncContext *s)
  
      s->mb_width  = (s->width  + 15) / 16;
      s->mb_height = (s->height + 15) / 16;
+    s->mb_stride = s->mb_width + 1;
+    mb_array_size= s->mb_height * s->mb_stride;
  
      /* set default edge pos, will be overriden in decode_header if needed */
      s->h_edge_pos= s->mb_width*16;
      s->v_edge_pos= s->mb_height*16;
  
      s->mb_num = s->mb_width * s->mb_height;
+    
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->mb_width*2 + 2;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_width + 2;
  
      y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
      c_size = (s->mb_width + 2) * (s->mb_height + 2);
@@ -349,16 +358,30 @@ int MPV_common_init(MpegEncContext *s)
  
      s->avctx->coded_frame= (AVFrame*)&s->current_picture;
  
+    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
+        }
+    }
+    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
+    
      if (s->encoding) {
-        int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
+        int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
  
          /* Allocate MV tables */
-        CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
+        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
+        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
+        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
+        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
+        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
  
          //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
          CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
@@ -375,14 +398,15 @@ int MPV_common_init(MpegEncContext *s)
              CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
          }
          CHECKED_ALLOCZ(s->avctx->stats_out, 256);
+
+        /* Allocate MB type table */
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
      }
          
-    CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(uint8_t))
+    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
      
      if (s->out_format == FMT_H263 || s->encoding) {
          int size;
-        /* Allocate MB type table */
-        CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(uint8_t))
  
          /* MV prediction */
          size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
@@ -391,12 +415,9 @@ int MPV_common_init(MpegEncContext *s)
  
      if(s->codec_id==CODEC_ID_MPEG4){
          /* interlaced direct mode decoding tables */
-        CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(int8_t))
+        CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
      }
-    /* 4mv b frame decoding table */
-    //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
-    CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(uint8_t))
      if (s->out_format == FMT_H263) {
          /* ac values */
          CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
@@ -410,8 +431,8 @@ int MPV_common_init(MpegEncContext *s)
          CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
  
          /* cbp, ac_pred, pred_dir */
-        CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(uint8_t))
-        CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
      }
      
      if (s->h263_pred || s->h263_plus || !s->encoding) {
@@ -425,14 +446,14 @@ int MPV_common_init(MpegEncContext *s)
      }
  
      /* which mb is a intra block */
-    CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
-    memset(s->mbintra_table, 1, s->mb_num);
+    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
+    memset(s->mbintra_table, 1, mb_array_size);
      
      /* default structure is frame */
      s->picture_structure = PICT_FRAME;
      
      /* init macroblock skip table */
-    CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
+    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
      //Note the +1 is for a quicker mpeg4 slice_end detection
      CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
      
@@ -455,13 +476,23 @@ void MPV_common_end(MpegEncContext *s)
  {
      int i;
  
+    av_freep(&s->parse_context.buffer);
+    s->parse_context.buffer_size=0;
+
      av_freep(&s->mb_type);
-    av_freep(&s->p_mv_table);
-    av_freep(&s->b_forw_mv_table);
-    av_freep(&s->b_back_mv_table);
-    av_freep(&s->b_bidir_forw_mv_table);
-    av_freep(&s->b_bidir_back_mv_table);
-    av_freep(&s->b_direct_mv_table);
+    av_freep(&s->p_mv_table_base);
+    av_freep(&s->b_forw_mv_table_base);
+    av_freep(&s->b_back_mv_table_base);
+    av_freep(&s->b_bidir_forw_mv_table_base);
+    av_freep(&s->b_bidir_back_mv_table_base);
+    av_freep(&s->b_direct_mv_table_base);
+    s->p_mv_table= NULL;
+    s->b_forw_mv_table= NULL;
+    s->b_back_mv_table= NULL;
+    s->b_bidir_forw_mv_table= NULL;
+    s->b_bidir_back_mv_table= NULL;
+    s->b_direct_mv_table= NULL;
+    
      av_freep(&s->motion_val);
      av_freep(&s->dc_val[0]);
      av_freep(&s->ac_val[0]);
@@ -479,16 +510,17 @@ void MPV_common_end(MpegEncContext *s)
      av_freep(&s->tex_pb_buffer);
      av_freep(&s->pb2_buffer);
      av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
-    av_freep(&s->co_located_type_table);
      av_freep(&s->field_mv_table);
      av_freep(&s->field_select_table);
      av_freep(&s->avctx->stats_out);
      av_freep(&s->ac_stats);
      av_freep(&s->error_status_table);
+    av_freep(&s->mb_index2xy);
  
      for(i=0; i<MAX_PICTURE_COUNT; i++){
          free_picture(s, &s->picture[i]);
      }
+    avcodec_default_free_buffers(s->avctx);
      s->context_initialized = 0;
  }
  
@@ -499,8 +531,7 @@ int MPV_encode_init(AVCodecContext *avctx)
  {
      MpegEncContext *s = avctx->priv_data;
      int i;
-
-    avctx->pix_fmt = PIX_FMT_YUV420P;
+    int chroma_h_shift, chroma_v_shift;
  
      s->bit_rate = avctx->bit_rate;
      s->bit_rate_tolerance = avctx->bit_rate_tolerance;
@@ -551,21 +582,65 @@ int MPV_encode_init(AVCodecContext *avctx)
      
      s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
  
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "4MV not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "qpel not supporetd by codec\n");
+        return -1;
+    }
+
+    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "data partitioning not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO){
+        fprintf(stderr, "b frames not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
+        fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
+        return -1;
+    }
+        
+    if(s->codec_id==CODEC_ID_MJPEG){
+        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
+        s->inter_quant_bias= 0;
+    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO){
+        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+        s->inter_quant_bias= 0;
+    }else{
+        s->intra_quant_bias=0;
+        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
+    }
+    
+    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->intra_quant_bias= avctx->intra_quant_bias;
+    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->inter_quant_bias= avctx->inter_quant_bias;
+        
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
+
      switch(avctx->codec->id) {
      case CODEC_ID_MPEG1VIDEO:
          s->out_format = FMT_MPEG1;
          s->low_delay= 0; //s->max_b_frames ? 0 : 1;
          avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
          break;
+    case CODEC_ID_LJPEG:
      case CODEC_ID_MJPEG:
          s->out_format = FMT_MJPEG;
          s->intra_only = 1; /* force intra only for jpeg */
          s->mjpeg_write_tables = 1; /* write all tables */
         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
-        s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
-        s->mjpeg_vsample[1] = 1; /* the only currently supported values */
+        s->mjpeg_vsample[0] = 1<<chroma_v_shift;
+        s->mjpeg_vsample[1] = 1;
          s->mjpeg_vsample[2] = 1; 
-        s->mjpeg_hsample[0] = 2;
+        s->mjpeg_hsample[0] = 1<<chroma_h_shift;
          s->mjpeg_hsample[1] = 1; 
          s->mjpeg_hsample[2] = 1; 
          if (mjpeg_init(s) < 0)
@@ -595,6 +670,14 @@ int MPV_encode_init(AVCodecContext *avctx)
          avctx->delay=0;
          s->low_delay=1;
          break;
+    case CODEC_ID_FLV1:
+        s->out_format = FMT_H263;
+        s->h263_flv = 2; /* format = 1; 11-bit codes */
+        s->unrestricted_mv = 1;
+        s->rtp_mode=0; /* don't allow GOB */
+        avctx->delay=0;
+        s->low_delay=1;
+        break;
      case CODEC_ID_RV10:
          s->out_format = FMT_H263;
          s->h263_rv10 = 1;
@@ -632,6 +715,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 3;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -641,6 +725,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 4;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -650,6 +735,7 @@ int MPV_encode_init(AVCodecContext *avctx)
          s->h263_pred = 1;
          s->unrestricted_mv = 1;
          s->msmpeg4_version= 5;
+        s->flipflop_rounding=1;
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -664,8 +750,8 @@ int MPV_encode_init(AVCodecContext *avctx)
              int i;
              done=1;
  
-            default_mv_penalty= av_mallocz( sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
-            memset(default_mv_penalty, 0, sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
+            default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+            memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
              memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
  
              for(i=-16; i<16; i++){
@@ -836,7 +922,7 @@ static int find_unused_picture(MpegEncContext *s, int shared){
          }
      }else{
          for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break;
+            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
          }
          for(i=0; i<MAX_PICTURE_COUNT; i++){
              if(s->picture[i].data[0]==NULL) break;
@@ -854,7 +940,9 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
      AVFrame *pic;
  
      s->mb_skiped = 0;
-    
+
+    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
+
      /* mark&release old frames */
      if (s->pict_type != B_TYPE && s->last_picture_ptr) {
          avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
@@ -870,13 +958,19 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
              }
          }
      }
-    
  alloc:
      if(!s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
+
          i= find_unused_picture(s, 0);
      
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= s->pict_type != B_TYPE;
+        pic->reference= s->pict_type != B_TYPE ? 3 : 0;
  
          if(s->current_picture_ptr)
              pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
@@ -886,11 +980,18 @@ alloc:
          s->current_picture_ptr= &s->picture[i];
      }
  
+    s->current_picture_ptr->pict_type= s->pict_type;
+    s->current_picture_ptr->quality= s->qscale;
+    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
+
+    s->current_picture= *s->current_picture_ptr;
+  
+  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
      if (s->pict_type != B_TYPE) {
          s->last_picture_ptr= s->next_picture_ptr;
          s->next_picture_ptr= s->current_picture_ptr;
      }
-    s->current_picture= *s->current_picture_ptr;
+    
      if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
      if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
      if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
@@ -912,6 +1013,7 @@ alloc:
          assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
          goto alloc;
      }
+  }
     
      s->hurry_up= s->avctx->hurry_up;
      s->error_resilience= avctx->error_resilience;
@@ -934,7 +1036,7 @@ void MPV_frame_end(MpegEncContext *s)
  {
      int i;
      /* draw edge for correct motion prediction if outside */
-    if(s->codec_id!=CODEC_ID_SVQ1){
+    if(s->codec_id!=CODEC_ID_SVQ1 && s->codec_id != CODEC_ID_MPEG1VIDEO){
          if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
              draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
              draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
@@ -957,28 +1059,15 @@ void MPV_frame_end(MpegEncContext *s)
      }
      assert(i<MAX_PICTURE_COUNT);
  #endif    
-    s->current_picture_ptr->quality= s->qscale; //FIXME get average of qscale_table
-    s->current_picture_ptr->pict_type= s->pict_type;
-    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
  
-    /* release non refernce frames */
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
-        if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
-            s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
-    }
-    if(s->avctx->debug&FF_DEBUG_SKIP){
-        int x,y;        
-        for(y=0; y<s->mb_height; y++){
-            for(x=0; x<s->mb_width; x++){
-                int count= s->mbskip_table[x + y*s->mb_width];
-                if(count>9) count=9;
-                printf(" %1d", count);
+    if(s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
              }
-            printf("\n");
          }
-        printf("pict type: %d\n", s->pict_type);
      }
-
      // clear copies, to avoid confusion
  #if 0
      memset(&s->last_picture, 0, sizeof(Picture));
@@ -987,6 +1076,193 @@ void MPV_frame_end(MpegEncContext *s)
  #endif
  }
  
+/**
+ * draws an line from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
+    int t, x, y, f;
+    
+    sx= clip(sx, 0, w-1);
+    sy= clip(sy, 0, h-1);
+    ex= clip(ex, 0, w-1);
+    ey= clip(ey, 0, h-1);
+    
+    buf[sy*stride + sx]+= color;
+    
+    if(ABS(ex - sx) > ABS(ey - sy)){
+        if(sx > ex){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx + sy*stride;
+        ex-= sx;
+        f= ((ey-sy)<<16)/ex;
+        for(x= 0; x <= ex; x++){
+            y= ((x*f) + (1<<15))>>16;
+            buf[y*stride + x]+= color;
+        }
+    }else{
+        if(sy > ey){
+            t=sx; sx=ex; ex=t;
+            t=sy; sy=ey; ey=t;
+        }
+        buf+= sx + sy*stride;
+        ey-= sy;
+        if(ey) f= ((ex-sx)<<16)/ey;
+        else   f= 0;
+        for(y= 0; y <= ey; y++){
+            x= ((y*f) + (1<<15))>>16;
+            buf[y*stride + x]+= color;
+        }
+    }
+}
+
+/**
+ * draws an arrow from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
+    int dx,dy;
+
+    sx= clip(sx, -100, w+100);
+    sy= clip(sy, -100, h+100);
+    ex= clip(ex, -100, w+100);
+    ey= clip(ey, -100, h+100);
+    
+    dx= ex - sx;
+    dy= ey - sy;
+    
+    if(dx*dx + dy*dy > 3*3){
+        int rx=  dx + dy;
+        int ry= -dx + dy;
+        int length= ff_sqrt((rx*rx + ry*ry)<<8);
+        
+        //FIXME subpixel accuracy
+        rx= ROUNDED_DIV(rx*3<<4, length);
+        ry= ROUNDED_DIV(ry*3<<4, length);
+        
+        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
+        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
+    }
+    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
+}
+
+/**
+ * prints debuging info for the given picture.
+ */
+void ff_print_debug_info(MpegEncContext *s, Picture *pict){
+
+    if(!pict || !pict->mb_type) return;
+
+    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
+        int x,y;
+
+        for(y=0; y<s->mb_height; y++){
+            for(x=0; x<s->mb_width; x++){
+                if(s->avctx->debug&FF_DEBUG_SKIP){
+                    int count= s->mbskip_table[x + y*s->mb_stride];
+                    if(count>9) count=9;
+                    printf("%1d", count);
+                }
+                if(s->avctx->debug&FF_DEBUG_QP){
+                    printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
+                }
+                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
+                    int mb_type= pict->mb_type[x + y*s->mb_stride];
+                    
+                    //Type & MV direction
+                    if(IS_PCM(mb_type))
+                        printf("P");
+                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
+                        printf("A");
+                    else if(IS_INTRA4x4(mb_type))
+                        printf("i");
+                    else if(IS_INTRA16x16(mb_type))
+                        printf("I");
+                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
+                        printf("d");
+                    else if(IS_DIRECT(mb_type))
+                        printf("D");
+                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
+                        printf("g");
+                    else if(IS_GMC(mb_type))
+                        printf("G");
+                    else if(IS_SKIP(mb_type))
+                        printf("S");
+                    else if(!USES_LIST(mb_type, 1))
+                        printf(">");
+                    else if(!USES_LIST(mb_type, 0))
+                        printf("<");
+                    else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        printf("X");
+                    }
+                    
+                    //segmentation
+                    if(IS_8X8(mb_type))
+                        printf("+");
+                    else if(IS_16X8(mb_type))
+                        printf("-");
+                    else if(IS_8X16(mb_type))
+                        printf("¦");
+                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
+                        printf(" ");
+                    else
+                        printf("?");
+                    
+                        
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
+                        printf("=");
+                    else
+                        printf(" ");
+                }
+//                printf(" ");
+            }
+            printf("\n");
+        }
+    }
+    
+    if((s->avctx->debug&FF_DEBUG_VIS_MV) && s->motion_val){
+        const int shift= 1 + s->quarter_sample;
+        int mb_y;
+        uint8_t *ptr= pict->data[0];
+        s->low_delay=0; //needed to see the vectors without trashing the buffers
+
+        for(mb_y=0; mb_y<s->mb_height; mb_y++){
+            int mb_x;
+            for(mb_x=0; mb_x<s->mb_width; mb_x++){
+                const int mb_index= mb_x + mb_y*s->mb_stride;
+                if(IS_8X8(s->current_picture.mb_type[mb_index])){
+                    int i;
+                    for(i=0; i<4; i++){
+                        int sx= mb_x*16 + 4 + 8*(i&1);
+                        int sy= mb_y*16 + 4 + 8*(i>>1);
+                        int xy= 1 + mb_x*2 + (i&1) + (mb_y*2 + 1 + (i>>1))*(s->mb_width*2 + 2);
+                        int mx= (s->motion_val[xy][0]>>shift) + sx;
+                        int my= (s->motion_val[xy][1]>>shift) + sy;
+                        draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+                    }
+                }else{
+                    int sx= mb_x*16 + 8;
+                    int sy= mb_y*16 + 8;
+                    int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
+                    int mx= (s->motion_val[xy][0]>>shift) + sx;
+                    int my= (s->motion_val[xy][1]>>shift) + sy;
+                    draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100);
+                }
+                s->mbskip_table[mb_index]=0;
+            }
+        }
+    }
+}
+
  #ifdef CONFIG_ENCODERS
  
  static int get_sae(uint8_t *src, int ref, int stride){
@@ -1040,7 +1316,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
          i= find_unused_picture(s, 1);
  
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
      
          for(i=0; i<4; i++){
              pic->data[i]= pic_arg->data[i];
@@ -1051,7 +1327,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
          i= find_unused_picture(s, 0);
  
          pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
  
          alloc_picture(s, (Picture*)pic, 0);
          for(i=0; i<4; i++){
@@ -1196,7 +1472,7 @@ static void select_input_picture(MpegEncContext *s){
      }
      
      if(s->reordered_input_picture[0]){
-        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE;
+        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
  
          s->new_picture= *s->reordered_input_picture[0];
  
@@ -1338,7 +1614,7 @@ static inline void gmc1_motion(MpegEncContext *s,
      if(s->flags&CODEC_FLAG_EMU_EDGE){
          if(src_x<0 || src_y<0 || src_x + 17 >= s->h_edge_pos
                                || src_y + 17 >= s->v_edge_pos){
-            ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer;
          }
      }
@@ -1377,7 +1653,7 @@ static inline void gmc1_motion(MpegEncContext *s,
      if(s->flags&CODEC_FLAG_EMU_EDGE){
          if(src_x<0 || src_y<0 || src_x + 9 >= s->h_edge_pos>>1
                                || src_y + 9 >= s->v_edge_pos>>1){
-            ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
              ptr= s->edge_emu_buffer;
              emu=1;
          }
@@ -1386,7 +1662,7 @@ static inline void gmc1_motion(MpegEncContext *s,
      
      ptr = ref_picture[2] + offset;
      if(emu){
-        ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
          ptr= s->edge_emu_buffer;
      }
      s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
@@ -1457,12 +1733,22 @@ static inline void gmc_motion(MpegEncContext *s,
             s->h_edge_pos>>1, s->v_edge_pos>>1);
  }
  
-
-void ff_emulated_edge_mc(MpegEncContext *s, uint8_t *src, int linesize, int block_w, int block_h, 
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
                                      int src_x, int src_y, int w, int h){
      int x, y;
      int start_y, start_x, end_y, end_x;
-    uint8_t *buf= s->edge_emu_buffer;
  
      if(src_y>= h){
          src+= (h-1-src_y)*linesize;
@@ -1558,7 +1844,7 @@ if(s->quarter_sample)
      if(s->flags&CODEC_FLAG_EMU_EDGE){
          if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
                                || src_y + (motion_y&1) + h  > v_edge_pos){
-            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
                               src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer + src_offset;
              emu=1;
@@ -1595,7 +1881,7 @@ if(s->quarter_sample)
      offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
      ptr = ref_picture[1] + offset;
      if(emu){
-        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
                           src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
          ptr= s->edge_emu_buffer + (src_offset >> 1);
      }
@@ -1603,7 +1889,7 @@ if(s->quarter_sample)
  
      ptr = ref_picture[2] + offset;
      if(emu){
-        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, 
                           src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
          ptr= s->edge_emu_buffer + (src_offset >> 1);
      }
@@ -1643,7 +1929,7 @@ static inline void qpel_motion(MpegEncContext *s,
      if(s->flags&CODEC_FLAG_EMU_EDGE){
          if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
                                || src_y + (motion_y&3) + h  > v_edge_pos){
-            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based, 
                               src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
              ptr= s->edge_emu_buffer + src_offset;
              emu=1;
@@ -1693,7 +1979,7 @@ static inline void qpel_motion(MpegEncContext *s,
      offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
      ptr = ref_picture[1] + offset;
      if(emu){
-        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
                           src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
          ptr= s->edge_emu_buffer + (src_offset >> 1);
      }
@@ -1701,7 +1987,7 @@ static inline void qpel_motion(MpegEncContext *s,
      
      ptr = ref_picture[2] + offset;
      if(emu){
-        ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, 
                           src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
          ptr= s->edge_emu_buffer + (src_offset >> 1);
      }
@@ -1717,6 +2003,18 @@ inline int ff_h263_round_chroma(int x){
      }
  }
  
+/**
+ * motion compesation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pic_op halfpel motion compensation function (average or put normally)
+ * @param pic_op qpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
  static inline void MPV_motion(MpegEncContext *s, 
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                                int dir, uint8_t **ref_picture, 
@@ -1783,7 +2081,7 @@ static inline void MPV_motion(MpegEncContext *s,
                  if(s->flags&CODEC_FLAG_EMU_EDGE){
                      if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
                                            || src_y + (motion_y&3) + 8 > s->v_edge_pos){
-                        ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
                          ptr= s->edge_emu_buffer;
                      }
                  }
@@ -1814,7 +2112,7 @@ static inline void MPV_motion(MpegEncContext *s,
                  if(s->flags&CODEC_FLAG_EMU_EDGE){
                      if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
                                            || src_y + (motion_y&1) + 8 > s->v_edge_pos){
-                        ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
                          ptr= s->edge_emu_buffer;
                      }
                  }
@@ -1849,7 +2147,7 @@ static inline void MPV_motion(MpegEncContext *s,
          if(s->flags&CODEC_FLAG_EMU_EDGE){
                  if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
                                        || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
-                    ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+                    ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
                      ptr= s->edge_emu_buffer;
                      emu=1;
                  }
@@ -1858,7 +2156,7 @@ static inline void MPV_motion(MpegEncContext *s,
  
          ptr = ref_picture[2] + offset;
          if(emu){
-            ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
              ptr= s->edge_emu_buffer;
          }
          pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
@@ -1903,6 +2201,105 @@ static inline void MPV_motion(MpegEncContext *s,
                          s->mv[dir][0][0], s->mv[dir][0][1], 16);
          }
          break;
+    case MV_TYPE_16X8:{
+        int offset;
+         uint8_t ** ref2picture;
+
+            if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
+                ref2picture= ref_picture;
+                offset= s->field_select[dir][0] ? s->linesize : 0;
+            }else{
+                ref2picture= s->current_picture.data;
+                offset= s->field_select[dir][0] ? s->linesize : -s->linesize; 
+            } 
+
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref2picture, offset,
+                        0, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
+
+
+            if(s->picture_structure == s->field_select[dir][1] + 1 || s->pict_type == B_TYPE || s->first_field){
+                ref2picture= ref_picture;
+                offset= s->field_select[dir][1] ? s->linesize : 0;
+            }else{
+                ref2picture= s->current_picture.data;
+                offset= s->field_select[dir][1] ? s->linesize : -s->linesize; 
+            } 
+            // I know it is ugly but this is the only way to fool emu_edge without rewrite mpeg_motion
+            mpeg_motion(s, dest_y+16*s->linesize, dest_cb+8*s->uvlinesize, dest_cr+8*s->uvlinesize,
+                        0,
+                        ref2picture, offset,
+                        0, pix_op,
+                        s->mv[dir][1][0], s->mv[dir][1][1]+16, 8);
+        }
+        
+        break;
+    case MV_TYPE_DMV:
+    {
+    op_pixels_func (*dmv_pix_op)[4];
+    int offset;
+
+        dmv_pix_op = s->dsp.put_pixels_tab;
+
+        if(s->picture_structure == PICT_FRAME){
+            //put top field from top field
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, 0,
+                        1, dmv_pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
+            //put bottom field from bottom field
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
+                        ref_picture, s->linesize,
+                        1, dmv_pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 8);
+
+            dmv_pix_op = s->dsp.avg_pixels_tab; 
+        
+            //avg top field from bottom field
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, s->linesize,
+                        1, dmv_pix_op,
+                        s->mv[dir][2][0], s->mv[dir][2][1], 8);
+            //avg bottom field from top field
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
+                        ref_picture, 0,
+                        1, dmv_pix_op,
+                        s->mv[dir][3][0], s->mv[dir][3][1], 8);
+
+        }else{
+            offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
+                         s->linesize : 0;
+
+            //put field from the same parity
+            //same parity is never in the same frame
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture,offset,
+                        0,dmv_pix_op,
+                        s->mv[dir][0][0],s->mv[dir][0][1],16);
+
+            // after put we make avg of the same block
+            dmv_pix_op=s->dsp.avg_pixels_tab; 
+
+            //opposite parity is always in the same frame if this is second field
+            if(!s->first_field){
+                ref_picture = s->current_picture.data;    
+                //top field is one linesize from frame beginig
+                offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
+                        -s->linesize : s->linesize;
+            }else 
+                offset=(s->picture_structure == PICT_BOTTOM_FIELD)? 
+                        0 : s->linesize;
+
+            //avg field from the opposite parity
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,0,
+                        ref_picture, offset,
+                        0,dmv_pix_op,
+                        s->mv[dir][2][0],s->mv[dir][2][1],16);
+        }
+    }
+    break;
+
      }
  }
  
@@ -1964,7 +2361,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
      memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
      memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
      
-    s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
+    s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
  }
  
  /* generic function called after a macroblock has been parsed by the
@@ -1980,7 +2377,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
  void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
  {
      int mb_x, mb_y;
-    const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
+    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
  
      mb_x = s->mb_x;
      mb_y = s->mb_y;
@@ -2006,33 +2403,26 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
          //FIXME a lot of thet is only needed for !low_delay
          const int wrap = s->block_wrap[0];
          const int xy = s->block_index[0];
-        const int mb_index= s->mb_x + s->mb_y*s->mb_width;
-        if(s->mv_type == MV_TYPE_8X8){
-            s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
-        } else {
+        if(s->mv_type != MV_TYPE_8X8){
              int motion_x, motion_y;
              if (s->mb_intra) {
                  motion_x = 0;
                  motion_y = 0;
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
              } else if (s->mv_type == MV_TYPE_16X16) {
                  motion_x = s->mv[0][0][0];
                  motion_y = s->mv[0][0][1];
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
              } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
                  int i;
                  motion_x = s->mv[0][0][0] + s->mv[0][1][0];
                  motion_y = s->mv[0][0][1] + s->mv[0][1][1];
                  motion_x = (motion_x>>1) | (motion_x&1);
                  for(i=0; i<2; i++){
-                    s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
-                    s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
-                    s->field_select_table[mb_index][i]= s->field_select[0][i];
+                    s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
+                    s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
+                    s->field_select_table[mb_xy][i]= s->field_select[0][i];
                  }
-                s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
              }
+            
              /* no update if 8X8 because it has been done during parsing */
              s->motion_val[xy][0] = motion_x;
              s->motion_val[xy][1] = motion_y;
@@ -2043,6 +2433,13 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
              s->motion_val[xy + 1 + wrap][0] = motion_x;
              s->motion_val[xy + 1 + wrap][1] = motion_y;
          }
+
+        if(s->encoding){ //FIXME encoding MUST be cleaned up
+            if (s->mv_type == MV_TYPE_8X8) 
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+            else
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
+        }
      }
      
      if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
@@ -2368,7 +2765,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
      for(i=0; i<6; i++) skip_dct[i]=0;
      
      if(s->adaptive_quant){
-        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
+        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
  
          if(s->out_format==FMT_H263){
              if     (s->dquant> 2) s->dquant= 2;
@@ -2377,10 +2774,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              
          if(s->codec_id==CODEC_ID_MPEG4){        
              if(!s->mb_intra){
-                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
-
                  if(s->mv_dir&MV_DIRECT)
                      s->dquant=0;
+
+                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
              }
          }
          s->qscale+= s->dquant;
@@ -2397,7 +2794,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
  
          if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
              ptr= s->edge_emu_buffer;
              emu=1;
          }
@@ -2429,14 +2826,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              int wrap_c = s->uvlinesize;
              ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr= s->edge_emu_buffer;
              }
             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
  
              ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
              if(emu){
-                ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr= s->edge_emu_buffer;
              }
              s->dsp.get_pixels(s->block[5], ptr, wrap_c);
@@ -2476,7 +2873,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
          }
  
          if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
-            ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
              ptr_y= s->edge_emu_buffer;
              emu=1;
          }
@@ -2508,18 +2905,18 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              skip_dct[5]= 1;
          }else{
              if(emu){
-                ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr_cb= s->edge_emu_buffer;
              }
              s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
              if(emu){
-                ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+                ff_emulated_edge_mc(s->edge_emu_buffer, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                  ptr_cr= s->edge_emu_buffer;
              }
              s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
          }
          /* pre quantization */         
-        if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
+        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
              //FIXME optimize
             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
              if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
@@ -2550,13 +2947,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
              {
                  float adap_parm;
                  
-                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
-                            ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
+                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
+                            ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
              
                  printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
-                        (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', 
+                        (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P', 
                          s->qscale, adap_parm, s->qscale*adap_parm,
-                        s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
+                        s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
              }
  #endif
      /* DCT & quantize */
@@ -2608,6 +3005,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
           ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_H263:
      case CODEC_ID_H263P:
+    case CODEC_ID_FLV1:
      case CODEC_ID_RV10:
          h263_encode_mb(s, s->block, motion_x, motion_y); break;
  #endif
@@ -2626,10 +3024,23 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
   */
  int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
      ParseContext *pc= &s->parse_context;
-        
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
+    /* copy overreaded byes from last frame into buffer */
+    for(; pc->overread>0; pc->overread--){
+        pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
+    }
+    
      pc->last_index= pc->index;
  
-    if(next==-1){
+    /* copy into buffer end return */
+    if(next == END_NOT_FOUND){
          pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
  
          memcpy(&pc->buffer[pc->index], *buf, *buf_size);
@@ -2637,15 +3048,31 @@ int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size)
          return -1;
      }
  
+    *buf_size=
+    pc->overread_index= pc->index + next;
+    
+    /* append to buffer */
      if(pc->index){
          pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
  
          memcpy(&pc->buffer[pc->index], *buf, next + FF_INPUT_BUFFER_PADDING_SIZE );
          pc->index = 0;
          *buf= pc->buffer;
-        *buf_size= pc->last_index + next;
      }
  
+    /* store overread bytes */
+    for(;next < 0; next++){
+        pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
      return 0;
  }
  
@@ -2668,7 +3095,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
      memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
  
      /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
      for(i=0; i<3; i++)
          d->last_dc[i]= s->last_dc[i];
      
@@ -2694,7 +3121,7 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
      memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
      
      /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
      for(i=0; i<3; i++)
          d->last_dc[i]= s->last_dc[i];
      
@@ -2794,13 +3221,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      }
  
      s->picture_number = picture_number;
-
-    s->block_wrap[0]=
-    s->block_wrap[1]=
-    s->block_wrap[2]=
-    s->block_wrap[3]= s->mb_width*2 + 2;
-    s->block_wrap[4]=
-    s->block_wrap[5]= s->mb_width + 2;
      
      /* Reset the average MB variance */
      s->current_picture.mb_var_sum = 0;
@@ -2818,8 +3238,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
      
      if(s->pict_type==I_TYPE){
-        if(s->msmpeg4_version) s->no_rounding=1;
-        else                   s->no_rounding=0;
+        if(s->msmpeg4_version >= 3) s->no_rounding=1;
+        else                        s->no_rounding=0;
      }else if(s->pict_type!=B_TYPE){
          if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
              s->no_rounding ^= 1;          
@@ -2869,8 +3289,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          /* I-Frame */
          //FIXME do we need to zero them?
          memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
-        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_width+2)*(s->mb_height+2)*2);
-        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
+        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
          
          if(!s->fixed_qscale){
              /* finding spatial complexity for I-frame rate control */
@@ -2884,8 +3304,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      
                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
  
-                    s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
-                    s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
+                    s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
+                    s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
                      s->current_picture.mb_var_sum    += varc;
                  }
              }
@@ -2895,7 +3315,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
  
      if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
          s->pict_type= I_TYPE;
-        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
@@ -2937,6 +3357,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              break;
          case CODEC_ID_H263:
          case CODEC_ID_H263P:
+        case CODEC_ID_FLV1:
              ff_clean_h263_qscales(s);
              break;
          }
@@ -3005,9 +3426,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          /* note: quant matrix value (8) is implied here */
          s->last_dc[i] = 128;
          
-        s->current_picture.error[i] = 0;
+        s->current_picture_ptr->error[i] = 0;
      }
-    s->mb_incr = 1;
+    s->mb_skip_run = 0;
      s->last_mv[0][0][0] = 0;
      s->last_mv[0][0][1] = 0;
      s->last_mv[1][0][0] = 0;
@@ -3016,18 +3437,23 @@ static void encode_picture(MpegEncContext *s, int picture_number)
      s->last_mv_dir = 0;
  
  #ifdef CONFIG_RISKY
-    if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
+    switch(s->codec_id){
+    case CODEC_ID_H263:
+    case CODEC_ID_H263P:
+    case CODEC_ID_FLV1:
          s->gob_index = ff_h263_get_gob_height(s);
-
-    if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
-        ff_mpeg4_init_partitions(s);
+        break;
+    case CODEC_ID_MPEG4:
+        if(s->partitioned_frame)
+            ff_mpeg4_init_partitions(s);
+        break;
+    }
  #endif
  
      s->resync_mb_x=0;
      s->resync_mb_y=0;
      s->first_slice_line = 1;
      s->ptr_lastgob = s->pb.buf;
-    s->ptr_last_mb_line = s->pb.buf;
      for(mb_y=0; mb_y < s->mb_height; mb_y++) {
          s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
          s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
@@ -3039,8 +3465,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
          s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
          s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
          for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-            int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
-            const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+            const int xy= mb_y*s->mb_stride + mb_x;
+            int mb_type= s->mb_type[xy];
  //            int d;
              int dmin=10000000;
  
@@ -3062,7 +3488,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  is_gob_start=0;
                  
                  if(s->codec_id==CODEC_ID_MPEG4){
-                    if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                         && s->mb_y + s->mb_x>0){
  
                          if(s->partitioned_frame){
@@ -3079,8 +3505,15 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                          ff_mpeg4_clean_buffers(s);
                          is_gob_start=1;
                      }
+                }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
+                    if(   current_packet_size >= s->rtp_payload_size 
+                       && s->mb_y + s->mb_x>0 && s->mb_skip_run==0){
+                        ff_mpeg1_encode_slice_header(s);
+                        ff_mpeg1_clean_buffers(s);
+                        is_gob_start=1;
+                    }
                  }else{
-                    if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                         && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
                         
                          h263_encode_gob_header(s, mb_y);                       
@@ -3186,7 +3619,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                                   &dmin, &next_block, 0, 0);
                      /* force cleaning of ac/dc pred stuff if needed ... */
                      if(s->h263_pred || s->h263_aic)
-                        s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+                        s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
                  }
                  copy_context_after_encode(s, &best_s, -1);
                  
@@ -3210,16 +3643,16 @@ static void encode_picture(MpegEncContext *s, int picture_number)
              } else {
                  int motion_x, motion_y;
                  int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width];
+                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
                  
                if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){
                  /* get luma score */
                  if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
                  }else{
                      uint8_t *dest_y;
  
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME
+                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
                      mean*= 0x01010101;
                      
                      dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
@@ -3235,8 +3668,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                      intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
                                          
  /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_width],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/
+                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
+                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
                  }
                  
                  /* get chroma score */
@@ -3377,34 +3810,23 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                  if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                  if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
  
-                s->current_picture.error[0] += sse(
+                s->current_picture_ptr->error[0] += sse(
                      s,
                      s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                      s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                      w, h, s->linesize);
-                s->current_picture.error[1] += sse(
+                s->current_picture_ptr->error[1] += sse(
                      s,
                      s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      w>>1, h>>1, s->uvlinesize);
-                s->current_picture.error[2] += sse(
+                s->current_picture_ptr->error[2] += sse(
                      s,
                      s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                      w>>1, h>>1, s->uvlinesize);
              }
-//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
-        }
-
-
-        /* Obtain average mb_row size for RTP */
-        if (s->rtp_mode) {
-            if (mb_y==0)
-                s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
-            else {    
-                s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
-            }
-            s->ptr_last_mb_line = pbBufPtr(&s->pb);
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
          }
      }
      emms_c();
@@ -3727,8 +4149,8 @@ static int dct_quantize_c(MpegEncContext *s,
          level = block[j];
          level = level * qmat[j];
  
-//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
-//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+//        if(   bias+level >= (1<<QMAT_SHIFT)
+//           || bias-level >= (1<<QMAT_SHIFT)){
          if(((unsigned)(level+threshold1))>threshold2){
              if(level>0){
                  level= (bias + level)>>QMAT_SHIFT;
@@ -3924,16 +4346,6 @@ static void dct_unquantize_h263_c(MpegEncContext *s,
  }
  
  
-char ff_get_pict_type_char(int pict_type){
-    switch(pict_type){
-    case I_TYPE: return 'I'; 
-    case P_TYPE: return 'P'; 
-    case B_TYPE: return 'B'; 
-    case S_TYPE: return 'S'; 
-    default:     return '?';
-    }
-}
-
  static const AVOption mpeg4_options[] =
  {
      AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000),
@@ -4043,6 +4455,16 @@ AVCodec h263p_encoder = {
      MPV_encode_end,
  };
  
+AVCodec flv_encoder = {
+    "flv",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_FLV1,
+    sizeof(MpegEncContext),
+    MPV_encode_init,
+    MPV_encode_picture,
+    MPV_encode_end,
+};
+
  AVCodec rv10_encoder = {
      "rv10",
      CODEC_TYPE_VIDEO,