]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/mpegvideo.c
fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
[ffmpeg] / libavcodec / mpegvideo.c
index 26398172656544731be24c40251634ce5c360e56..c0dd96bbece57c0564850f0bf67b0e0e22a2ae65 100644 (file)
@@ -82,7 +82,7 @@ static const uint8_t h263_chroma_roundtab[16] = {
 };
 
 #ifdef CONFIG_ENCODERS
-static uint16_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
+static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
 static uint8_t default_fcode_tab[MAX_MV*2+1];
 
 enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
@@ -136,16 +136,6 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16
 }
 #endif //CONFIG_ENCODERS
 
-// move into common.c perhaps 
-#define CHECKED_ALLOCZ(p, size)\
-{\
-    p= av_mallocz(size);\
-    if(p==NULL){\
-        perror("malloc");\
-        goto fail;\
-    }\
-}
-
 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const uint8_t *src_scantable){
     int i;
     int end;
@@ -227,6 +217,9 @@ int DCT_common_init(MpegEncContext *s)
  * The pixels are allocated/set by calling get_buffer() if shared=0
  */
 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
+    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
+    const int mb_array_size= s->mb_stride*s->mb_height;
+    int i;
     
     if(shared){
         assert(pic->data[0]);
@@ -260,17 +253,25 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     
     if(pic->qscale_table==NULL){
         if (s->encoding) {        
-            CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(int16_t))
-            CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t))
+            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
+            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
+            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
         }
 
-        CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(uint8_t)+1) //the +1 is for the slice end check
-        CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(uint8_t))
-        pic->qstride= s->mb_width;
+        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
+        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(int))
+        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
+        if(s->out_format == FMT_H264){
+            for(i=0; i<2; i++){
+                CHECKED_ALLOCZ(pic->motion_val[i], 2 * 16 * s->mb_num * sizeof(uint16_t))
+                CHECKED_ALLOCZ(pic->ref_index[i] , 4 * s->mb_num * sizeof(uint8_t))
+            }
+        }
+        pic->qstride= s->mb_stride;
     }
-    
+
     //it might be nicer if the application would keep track of these but it would require a API change
     memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
     s->prev_pict_types[0]= s->pict_type;
@@ -298,15 +299,14 @@ static void free_picture(MpegEncContext *s, Picture *pic){
     av_freep(&pic->mb_cmp_score);
     av_freep(&pic->mbskip_table);
     av_freep(&pic->qscale_table);
+    av_freep(&pic->mb_type_base);
+    pic->mb_type= NULL;
+    for(i=0; i<2; i++){
+        av_freep(&pic->motion_val[i]);
+        av_freep(&pic->ref_index[i]);
+    }
     
-    if(pic->type == FF_BUFFER_TYPE_INTERNAL){
-        for(i=0; i<4; i++){
-            av_freep(&pic->base[i]);
-            pic->data[i]= NULL;
-        }
-        av_freep(&pic->opaque);
-        pic->type= 0;
-    }else if(pic->type == FF_BUFFER_TYPE_SHARED){
+    if(pic->type == FF_BUFFER_TYPE_SHARED){
         for(i=0; i<4; i++){
             pic->base[i]=
             pic->data[i]= NULL;
@@ -318,7 +318,7 @@ static void free_picture(MpegEncContext *s, Picture *pic){
 /* init common structure for both encoder and decoder */
 int MPV_common_init(MpegEncContext *s)
 {
-    int y_size, c_size, yc_size, i;
+    int y_size, c_size, yc_size, i, mb_array_size, x, y;
 
     dsputil_init(&s->dsp, s->avctx);
     DCT_common_init(s);
@@ -327,12 +327,21 @@ int MPV_common_init(MpegEncContext *s)
 
     s->mb_width  = (s->width  + 15) / 16;
     s->mb_height = (s->height + 15) / 16;
+    s->mb_stride = s->mb_width + 1;
+    mb_array_size= s->mb_height * s->mb_stride;
 
     /* set default edge pos, will be overriden in decode_header if needed */
     s->h_edge_pos= s->mb_width*16;
     s->v_edge_pos= s->mb_height*16;
 
     s->mb_num = s->mb_width * s->mb_height;
+    
+    s->block_wrap[0]=
+    s->block_wrap[1]=
+    s->block_wrap[2]=
+    s->block_wrap[3]= s->mb_width*2 + 2;
+    s->block_wrap[4]=
+    s->block_wrap[5]= s->mb_width + 2;
 
     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
     c_size = (s->mb_width + 2) * (s->mb_height + 2);
@@ -349,16 +358,30 @@ int MPV_common_init(MpegEncContext *s)
 
     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 
+    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
+    for(y=0; y<s->mb_height; y++){
+        for(x=0; x<s->mb_width; x++){
+            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
+        }
+    }
+    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
+    
     if (s->encoding) {
-        int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
+        int mv_table_size= s->mb_stride * (s->mb_height+2) + 1;
 
         /* Allocate MV tables */
-        CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
+        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
+        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
+        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
+        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
+        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
 
         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
         CHECKED_ALLOCZ(s->me.scratchpad,  s->width*2*16*3*sizeof(uint8_t)) 
@@ -375,14 +398,15 @@ int MPV_common_init(MpegEncContext *s)
             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
         }
         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
+
+        /* Allocate MB type table */
+        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint8_t)) //needed for encoding
     }
         
-    CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(uint8_t))
+    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
     
     if (s->out_format == FMT_H263 || s->encoding) {
         int size;
-        /* Allocate MB type table */
-        CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(uint8_t))
 
         /* MV prediction */
         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
@@ -391,12 +415,9 @@ int MPV_common_init(MpegEncContext *s)
 
     if(s->codec_id==CODEC_ID_MPEG4){
         /* interlaced direct mode decoding tables */
-        CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(int16_t))
-        CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(int8_t))
+        CHECKED_ALLOCZ(s->field_mv_table, mb_array_size*2*2 * sizeof(int16_t))
+        CHECKED_ALLOCZ(s->field_select_table, mb_array_size*2* sizeof(int8_t))
     }
-    /* 4mv b frame decoding table */
-    //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
-    CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(uint8_t))
     if (s->out_format == FMT_H263) {
         /* ac values */
         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(int16_t) * 16);
@@ -410,8 +431,8 @@ int MPV_common_init(MpegEncContext *s)
         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 
         /* cbp, ac_pred, pred_dir */
-        CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(uint8_t))
-        CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
+        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
     }
     
     if (s->h263_pred || s->h263_plus || !s->encoding) {
@@ -425,14 +446,14 @@ int MPV_common_init(MpegEncContext *s)
     }
 
     /* which mb is a intra block */
-    CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
-    memset(s->mbintra_table, 1, s->mb_num);
+    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
+    memset(s->mbintra_table, 1, mb_array_size);
     
     /* default structure is frame */
     s->picture_structure = PICT_FRAME;
     
     /* init macroblock skip table */
-    CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
+    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
     //Note the +1 is for a quicker mpeg4 slice_end detection
     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
     
@@ -456,12 +477,19 @@ void MPV_common_end(MpegEncContext *s)
     int i;
 
     av_freep(&s->mb_type);
-    av_freep(&s->p_mv_table);
-    av_freep(&s->b_forw_mv_table);
-    av_freep(&s->b_back_mv_table);
-    av_freep(&s->b_bidir_forw_mv_table);
-    av_freep(&s->b_bidir_back_mv_table);
-    av_freep(&s->b_direct_mv_table);
+    av_freep(&s->p_mv_table_base);
+    av_freep(&s->b_forw_mv_table_base);
+    av_freep(&s->b_back_mv_table_base);
+    av_freep(&s->b_bidir_forw_mv_table_base);
+    av_freep(&s->b_bidir_back_mv_table_base);
+    av_freep(&s->b_direct_mv_table_base);
+    s->p_mv_table= NULL;
+    s->b_forw_mv_table= NULL;
+    s->b_back_mv_table= NULL;
+    s->b_bidir_forw_mv_table= NULL;
+    s->b_bidir_back_mv_table= NULL;
+    s->b_direct_mv_table= NULL;
+    
     av_freep(&s->motion_val);
     av_freep(&s->dc_val[0]);
     av_freep(&s->ac_val[0]);
@@ -479,16 +507,17 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->tex_pb_buffer);
     av_freep(&s->pb2_buffer);
     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
-    av_freep(&s->co_located_type_table);
     av_freep(&s->field_mv_table);
     av_freep(&s->field_select_table);
     av_freep(&s->avctx->stats_out);
     av_freep(&s->ac_stats);
     av_freep(&s->error_status_table);
+    av_freep(&s->mb_index2xy);
 
     for(i=0; i<MAX_PICTURE_COUNT; i++){
         free_picture(s, &s->picture[i]);
     }
+    avcodec_default_free_buffers(s->avctx);
     s->context_initialized = 0;
 }
 
@@ -504,7 +533,6 @@ int MPV_encode_init(AVCodecContext *avctx)
 
     s->bit_rate = avctx->bit_rate;
     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
-    s->frame_rate = avctx->frame_rate;
     s->width = avctx->width;
     s->height = avctx->height;
     if(avctx->gop_size > 600){
@@ -516,8 +544,6 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->rtp_payload_size = avctx->rtp_payload_size;
     if (avctx->rtp_callback)
         s->rtp_callback = avctx->rtp_callback;
-    s->qmin= avctx->qmin;
-    s->qmax= avctx->qmax;
     s->max_qdiff= avctx->max_qdiff;
     s->qcompress= avctx->qcompress;
     s->qblur= avctx->qblur;
@@ -554,10 +580,52 @@ int MPV_encode_init(AVCodecContext *avctx)
     
     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "4MV not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "qpel not supporetd by codec\n");
+        return -1;
+    }
+
+    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
+        fprintf(stderr, "data partitioning not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO){
+        fprintf(stderr, "b frames not supporetd by codec\n");
+        return -1;
+    }
+    
+    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
+        fprintf(stderr, "mpeg2 style quantization not supporetd by codec\n");
+        return -1;
+    }
+        
+    if(s->codec_id==CODEC_ID_MJPEG){
+        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
+        s->inter_quant_bias= 0;
+    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO){
+        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+        s->inter_quant_bias= 0;
+    }else{
+        s->intra_quant_bias=0;
+        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
+    }
+    
+    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->intra_quant_bias= avctx->intra_quant_bias;
+    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
+        s->inter_quant_bias= avctx->inter_quant_bias;
+    
     switch(avctx->codec->id) {
     case CODEC_ID_MPEG1VIDEO:
         s->out_format = FMT_MPEG1;
-        avctx->delay=0; //FIXME not sure, should check the spec
+        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
+        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
         break;
     case CODEC_ID_MJPEG:
         s->out_format = FMT_MJPEG;
@@ -634,6 +702,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 3;
+        s->flipflop_rounding=1;
         avctx->delay=0;
         s->low_delay=1;
         break;
@@ -643,6 +712,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 4;
+        s->flipflop_rounding=1;
         avctx->delay=0;
         s->low_delay=1;
         break;
@@ -652,6 +722,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->h263_pred = 1;
         s->unrestricted_mv = 1;
         s->msmpeg4_version= 5;
+        s->flipflop_rounding=1;
         avctx->delay=0;
         s->low_delay=1;
         break;
@@ -666,8 +737,8 @@ int MPV_encode_init(AVCodecContext *avctx)
             int i;
             done=1;
 
-            default_mv_penalty= av_mallocz( sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
-            memset(default_mv_penalty, 0, sizeof(uint16_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
+            default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+            memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1));
             memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
 
             for(i=-16; i<16; i++){
@@ -838,7 +909,7 @@ static int find_unused_picture(MpegEncContext *s, int shared){
         }
     }else{
         for(i=0; i<MAX_PICTURE_COUNT; i++){
-            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break;
+            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break; //FIXME
         }
         for(i=0; i<MAX_PICTURE_COUNT; i++){
             if(s->picture[i].data[0]==NULL) break;
@@ -856,24 +927,18 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     AVFrame *pic;
 
     s->mb_skiped = 0;
-    
+
+    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
+
     /* mark&release old frames */
-    if (s->pict_type != B_TYPE && s->last_picture.data[0]) {
-        for(i=0; i<MAX_PICTURE_COUNT; i++){
-//printf("%8X %d %d %X %X\n", s->picture[i].data[0], s->picture[i].type, i, s->next_picture.data[0], s->last_picture.data[0]);
-            if(s->picture[i].data[0] == s->last_picture.data[0]){
-//                s->picture[i].reference=0;
-                avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
-                break;
-            }    
-        }
-        assert(i<MAX_PICTURE_COUNT);
+    if (s->pict_type != B_TYPE && s->last_picture_ptr) {
+        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
 
         /* release forgotten pictures */
         /* if(mpeg124/h263) */
         if(!s->encoding){
             for(i=0; i<MAX_PICTURE_COUNT; i++){
-                if(s->picture[i].data[0] && s->picture[i].data[0] != s->next_picture.data[0] && s->picture[i].reference){
+                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
                     fprintf(stderr, "releasing zombie picture\n");
                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
                 }
@@ -882,27 +947,60 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     }
 alloc:
     if(!s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
+
         i= find_unused_picture(s, 0);
     
         pic= (AVFrame*)&s->picture[i];
-        pic->reference= s->pict_type != B_TYPE;
-        pic->coded_picture_number= s->current_picture.coded_picture_number+1;
+        pic->reference= s->pict_type != B_TYPE ? 3 : 0;
+
+        if(s->current_picture_ptr)
+            pic->coded_picture_number= s->current_picture_ptr->coded_picture_number+1;
         
         alloc_picture(s, (Picture*)pic, 0);
 
-        s->current_pictures->picture[i];
+        s->current_picture_ptr= &s->picture[i];
     }
 
+    s->current_picture_ptr->pict_type= s->pict_type;
+    s->current_picture_ptr->quality= s->qscale;
+    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
+
+    s->current_picture= *s->current_picture_ptr;
+  
+  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
     if (s->pict_type != B_TYPE) {
-        s->last_picture= s->next_picture;
-        s->next_picture= s->current_picture;
+        s->last_picture_ptr= s->next_picture_ptr;
+        s->next_picture_ptr= s->current_picture_ptr;
     }
     
-    if(s->pict_type != I_TYPE && s->last_picture.data[0]==NULL){
+    if(s->last_picture_ptr) s->last_picture= *s->last_picture_ptr;
+    if(s->next_picture_ptr) s->next_picture= *s->next_picture_ptr;
+    if(s->new_picture_ptr ) s->new_picture = *s->new_picture_ptr;
+    
+    if(s->picture_structure!=PICT_FRAME){
+        int i;
+        for(i=0; i<4; i++){
+            if(s->picture_structure == PICT_BOTTOM_FIELD){
+                 s->current_picture.data[i] += s->current_picture.linesize[i];
+            } 
+            s->current_picture.linesize[i] *= 2;
+            s->last_picture.linesize[i] *=2;
+            s->next_picture.linesize[i] *=2;
+        }
+    }
+    
+    if(s->pict_type != I_TYPE && s->last_picture_ptr==NULL){
         fprintf(stderr, "warning: first frame is no keyframe\n");
         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
         goto alloc;
     }
+  }
    
     s->hurry_up= s->avctx->hurry_up;
     s->error_resilience= avctx->error_resilience;
@@ -938,12 +1036,8 @@ void MPV_frame_end(MpegEncContext *s)
     if(s->pict_type!=B_TYPE){
         s->last_non_b_pict_type= s->pict_type;
     }
-    
-    s->current_picture.quality= s->qscale; //FIXME get average of qscale_table
-    s->current_picture.pict_type= s->pict_type;
-    s->current_picture.key_frame= s->pict_type == I_TYPE;
-    
-    /* copy back current_picture variables */
+#if 0
+        /* copy back current_picture variables */
     for(i=0; i<MAX_PICTURE_COUNT; i++){
         if(s->picture[i].data[0] == s->current_picture.data[0]){
             s->picture[i]= s->current_picture;
@@ -951,23 +1045,97 @@ void MPV_frame_end(MpegEncContext *s)
         }    
     }
     assert(i<MAX_PICTURE_COUNT);
+#endif    
 
-    /* release non refernce frames */
-    for(i=0; i<MAX_PICTURE_COUNT; i++){
-        if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
-            s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+    if(s->encoding){
+        /* release non refernce frames */
+        for(i=0; i<MAX_PICTURE_COUNT; i++){
+            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
+                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
+            }
+        }
     }
-    if(s->avctx->debug&FF_DEBUG_SKIP){
-        int x,y;        
+    // clear copies, to avoid confusion
+#if 0
+    memset(&s->last_picture, 0, sizeof(Picture));
+    memset(&s->next_picture, 0, sizeof(Picture));
+    memset(&s->current_picture, 0, sizeof(Picture));
+#endif
+}
+
+/**
+ * prints debuging info for the given picture.
+ */
+void ff_print_debug_info(MpegEncContext *s, Picture *pict){
+
+    if(!pict || !pict->mb_type) return;
+
+    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
+        int x,y;
+
         for(y=0; y<s->mb_height; y++){
             for(x=0; x<s->mb_width; x++){
-                int count= s->mbskip_table[x + y*s->mb_width];
-                if(count>9) count=9;
-                printf(" %1d", count);
+                if(s->avctx->debug&FF_DEBUG_SKIP){
+                    int count= s->mbskip_table[x + y*s->mb_stride];
+                    if(count>9) count=9;
+                    printf("%1d", count);
+                }
+                if(s->avctx->debug&FF_DEBUG_QP){
+                    printf("%2d", pict->qscale_table[x + y*s->mb_stride]);
+                }
+                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
+                    int mb_type= pict->mb_type[x + y*s->mb_stride];
+                    
+                    //Type & MV direction
+                    if(IS_PCM(mb_type))
+                        printf("P");
+                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
+                        printf("A");
+                    else if(IS_INTRA4x4(mb_type))
+                        printf("i");
+                    else if(IS_INTRA16x16(mb_type))
+                        printf("I");
+                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
+                        printf("d");
+                    else if(IS_DIRECT(mb_type))
+                        printf("D");
+                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
+                        printf("g");
+                    else if(IS_GMC(mb_type))
+                        printf("G");
+                    else if(IS_SKIP(mb_type))
+                        printf("S");
+                    else if(!USES_LIST(mb_type, 1))
+                        printf(">");
+                    else if(!USES_LIST(mb_type, 0))
+                        printf("<");
+                    else{
+                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        printf("X");
+                    }
+                    
+                    //segmentation
+                    if(IS_8X8(mb_type))
+                        printf("+");
+                    else if(IS_16X8(mb_type))
+                        printf("-");
+                    else if(IS_8X16(mb_type))
+                        printf("¦");
+                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
+                        printf(" ");
+                    else
+                        printf("?");
+                    
+                        
+                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
+                        printf("=");
+                    else
+                        printf(" ");
+                }
+//                printf(" ");
             }
             printf("\n");
         }
-        printf("pict type: %d\n", s->pict_type);
     }
 }
 
@@ -1024,7 +1192,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
         i= find_unused_picture(s, 1);
 
         pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
     
         for(i=0; i<4; i++){
             pic->data[i]= pic_arg->data[i];
@@ -1035,9 +1203,15 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
         i= find_unused_picture(s, 0);
 
         pic= (AVFrame*)&s->picture[i];
-        pic->reference= 1;
+        pic->reference= 3;
 
         alloc_picture(s, (Picture*)pic, 0);
+        for(i=0; i<4; i++){
+            /* the input will be 16 pixels to the right relative to the actual buffer start
+             * and the current_pic, so the buffer can be reused, yes its not beatifull 
+             */
+            pic->data[i]+= 16; 
+        }
 
         if(   pic->data[0] == pic_arg->data[0] 
            && pic->data[1] == pic_arg->data[1]
@@ -1100,7 +1274,7 @@ static void select_input_picture(MpegEncContext *s){
 
     /* set next picture types & ordering */
     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
-        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture.data[0]==NULL || s->intra_only){
+        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
             s->reordered_input_picture[0]= s->input_picture[0];
             s->reordered_input_picture[0]->pict_type= I_TYPE;
             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
@@ -1174,19 +1348,22 @@ static void select_input_picture(MpegEncContext *s){
     }
     
     if(s->reordered_input_picture[0]){
-       s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE;
+        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
+
+        s->new_picture= *s->reordered_input_picture[0];
 
         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
+            // input is a shared pix, so we cant modifiy it -> alloc a new one & ensure that the shared one is reuseable
+        
             int i= find_unused_picture(s, 0);
             Picture *pic= &s->picture[i];
 
-            s->new_picture= *s->reordered_input_picture[0];
-
             /* mark us unused / free shared pic */
             for(i=0; i<4; i++)
                 s->reordered_input_picture[0]->data[i]= NULL;
             s->reordered_input_picture[0]->type= 0;
             
+            //FIXME bad, copy * except
             pic->pict_type = s->reordered_input_picture[0]->pict_type;
             pic->quality   = s->reordered_input_picture[0]->quality;
             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
@@ -1194,18 +1371,20 @@ static void select_input_picture(MpegEncContext *s){
             
             alloc_picture(s, pic, 0);
 
-            s->current_picture= *pic;
+            s->current_picture_ptr= pic;
         }else{
+            // input is not a shared pix -> reuse buffer for current_pix
+
             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
             
-            s->new_picture= *s->reordered_input_picture[0];
-
+            s->current_picture_ptr= s->reordered_input_picture[0];
             for(i=0; i<4; i++){
-                s->reordered_input_picture[0]->data[i]-=16; //FIXME dirty
+                //reverse the +16 we did before storing the input
+                s->current_picture_ptr->data[i]-=16;
             }
-            s->current_picture= *s->reordered_input_picture[0];
         }
+        s->current_picture= *s->current_picture_ptr;
     
         s->picture_number= s->new_picture.display_picture_number;
 //printf("dpn:%d\n", s->picture_number);
@@ -1260,6 +1439,10 @@ int MPV_encode_picture(AVCodecContext *avctx,
         
         if(s->flags&CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
+
+        for(i=0; i<4; i++){
+            avctx->error[i] += s->current_picture_ptr->error[i];
+        }
     }
 
     s->input_picture_number++;
@@ -1269,10 +1452,6 @@ int MPV_encode_picture(AVCodecContext *avctx,
     
     s->total_bits += s->frame_bits;
     avctx->frame_bits  = s->frame_bits;
-
-    for(i=0; i<4; i++){
-        avctx->error[i] += s->current_picture.error[i];
-    }
     
     return pbBufPtr(&s->pb) - s->pb.buf;
 }
@@ -1523,15 +1702,15 @@ if(s->quarter_sample)
     src_y = clip(src_y, -16, height);
     if (src_y == height)
         dxy &= ~2;
-    linesize   = s->linesize << field_based;
-    uvlinesize = s->uvlinesize << field_based;
+    linesize   = s->current_picture.linesize[0] << field_based;
+    uvlinesize = s->current_picture.linesize[1] << field_based;
     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
     dest_y += dest_offset;
 
     if(s->flags&CODEC_FLAG_EMU_EDGE){
         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
                               || src_y + (motion_y&1) + h  > v_edge_pos){
-            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, 
+            ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
             ptr= s->edge_emu_buffer + src_offset;
             emu=1;
@@ -1690,6 +1869,18 @@ inline int ff_h263_round_chroma(int x){
     }
 }
 
+/**
+ * motion compesation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pic_op halfpel motion compensation function (average or put normally)
+ * @param pic_op qpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
 static inline void MPV_motion(MpegEncContext *s, 
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
                               int dir, uint8_t **ref_picture, 
@@ -1864,10 +2055,10 @@ static inline void MPV_motion(MpegEncContext *s,
         } else {
             int offset;
             if(s->picture_structure == s->field_select[dir][0] + 1 || s->pict_type == B_TYPE || s->first_field){
-                offset= s->field_select[dir][0] ? s->linesize/2 : 0;
+                offset= s->field_select[dir][0] ? s->linesize : 0;
             }else{
                 ref_picture= s->current_picture.data;
-                offset= s->field_select[dir][0] ? s->linesize/2 : -s->linesize/2
+                offset= s->field_select[dir][0] ? s->linesize : -s->linesize
             } 
 
             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
@@ -1937,7 +2128,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
     
-    s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
+    s->mbintra_table[s->mb_x + s->mb_y*s->mb_stride]= 0;
 }
 
 /* generic function called after a macroblock has been parsed by the
@@ -1953,7 +2144,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
     int mb_x, mb_y;
-    const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
+    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
 
     mb_x = s->mb_x;
     mb_y = s->mb_y;
@@ -1979,33 +2170,26 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         //FIXME a lot of thet is only needed for !low_delay
         const int wrap = s->block_wrap[0];
         const int xy = s->block_index[0];
-        const int mb_index= s->mb_x + s->mb_y*s->mb_width;
-        if(s->mv_type == MV_TYPE_8X8){
-            s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
-        } else {
+        if(s->mv_type != MV_TYPE_8X8){
             int motion_x, motion_y;
             if (s->mb_intra) {
                 motion_x = 0;
                 motion_y = 0;
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
             } else if (s->mv_type == MV_TYPE_16X16) {
                 motion_x = s->mv[0][0][0];
                 motion_y = s->mv[0][0][1];
-                if(s->co_located_type_table)
-                    s->co_located_type_table[mb_index]= 0;
             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
                 int i;
                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
                 motion_x = (motion_x>>1) | (motion_x&1);
                 for(i=0; i<2; i++){
-                    s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
-                    s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
-                    s->field_select_table[mb_index][i]= s->field_select[0][i];
+                    s->field_mv_table[mb_xy][i][0]= s->mv[0][i][0];
+                    s->field_mv_table[mb_xy][i][1]= s->mv[0][i][1];
+                    s->field_select_table[mb_xy][i]= s->field_select[0][i];
                 }
-                s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
             }
+            
             /* no update if 8X8 because it has been done during parsing */
             s->motion_val[xy][0] = motion_x;
             s->motion_val[xy][1] = motion_y;
@@ -2016,6 +2200,13 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
             s->motion_val[xy + 1 + wrap][0] = motion_x;
             s->motion_val[xy + 1 + wrap][1] = motion_y;
         }
+
+        if(s->encoding){ //FIXME encoding MUST be cleaned up
+            if (s->mv_type == MV_TYPE_8X8) 
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+            else
+                s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
+        }
     }
     
     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
@@ -2023,6 +2214,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         int dct_linesize, dct_offset;
         op_pixels_func (*op_pix)[4];
         qpel_mc_func (*op_qpix)[16];
+        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics
+        const int uvlinesize= s->current_picture.linesize[1];
 
         /* avoid copy if macroblock skipped in last frame too */
         if (s->pict_type != B_TYPE) {
@@ -2061,17 +2254,17 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
             dest_cb = s->current_picture.data[1] + mb_x * 8;
             dest_cr = s->current_picture.data[2] + mb_x * 8;
         }else{
-            dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
-            dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
-            dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+            dest_y  = s->current_picture.data[0] + (mb_y * 16* linesize  ) + mb_x * 16;
+            dest_cb = s->current_picture.data[1] + (mb_y * 8 * uvlinesize) + mb_x * 8;
+            dest_cr = s->current_picture.data[2] + (mb_y * 8 * uvlinesize) + mb_x * 8;
         }
 
         if (s->interlaced_dct) {
-            dct_linesize = s->linesize * 2;
-            dct_offset = s->linesize;
+            dct_linesize = linesize * 2;
+            dct_offset = linesize;
         } else {
-            dct_linesize = s->linesize;
-            dct_offset = s->linesize * 8;
+            dct_linesize = linesize;
+            dct_offset = linesize * 8;
         }
 
         if (!s->mb_intra) {
@@ -2108,8 +2301,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize);
                 }
             } else if(s->codec_id != CODEC_ID_WMV2){
                 add_dct(s, block[0], 0, dest_y, dct_linesize);
@@ -2118,8 +2311,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    add_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    add_dct(s, block[5], 5, dest_cr, uvlinesize);
                 }
             } 
 #ifdef CONFIG_RISKY
@@ -2136,8 +2329,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                    put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                    put_dct(s, block[4], 4, dest_cb, uvlinesize);
+                    put_dct(s, block[5], 5, dest_cr, uvlinesize);
                 }
             }else{
                 s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
@@ -2146,8 +2339,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                 s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    s->dsp.idct_put(dest_cb, s->uvlinesize, block[4]);
-                    s->dsp.idct_put(dest_cr, s->uvlinesize, block[5]);
+                    s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
+                    s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
                 }
             }
         }
@@ -2300,7 +2493,7 @@ static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move
  */
 void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
     if (    s->avctx->draw_horiz_band 
-        && (s->last_picture.data[0] || s->low_delay) ) {
+        && (s->last_picture_ptr || s->low_delay) ) {
         uint8_t *src_ptr[3];
         int offset;
         h= FFMIN(h, s->height - y);
@@ -2339,7 +2532,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
     for(i=0; i<6; i++) skip_dct[i]=0;
     
     if(s->adaptive_quant){
-        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
+        s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_stride] - s->qscale;
 
         if(s->out_format==FMT_H263){
             if     (s->dquant> 2) s->dquant= 2;
@@ -2348,10 +2541,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             
         if(s->codec_id==CODEC_ID_MPEG4){        
             if(!s->mb_intra){
-                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
-
                 if(s->mv_dir&MV_DIRECT)
                     s->dquant=0;
+
+                assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
             }
         }
         s->qscale+= s->dquant;
@@ -2490,7 +2683,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
         }
         /* pre quantization */         
-        if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
+        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
             //FIXME optimize
            if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
@@ -2521,13 +2714,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             {
                 float adap_parm;
                 
-                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
-                            ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
+                adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_stride*mb_y+mb_x] + 1.0) /
+                            ((s->mb_var[s->mb_stride*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
             
                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d", 
-                        (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P', 
+                        (s->mb_type[s->mb_stride*mb_y+mb_x] > 0) ? 'I' : 'P', 
                         s->qscale, adap_parm, s->qscale*adap_parm,
-                        s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
+                        s->mb_var[s->mb_stride*mb_y+mb_x], s->avg_mb_var);
             }
 #endif
     /* DCT & quantize */
@@ -2597,17 +2790,33 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
  */
 int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size){
     ParseContext *pc= &s->parse_context;
-        
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
+    /* copy overreaded byes from last frame into buffer */
+    for(; pc->overread>0; pc->overread--){
+        pc->buffer[pc->index++]= pc->buffer[pc->overread_index++];
+    }
+    
     pc->last_index= pc->index;
 
-    if(next==-1){
+    /* copy into buffer end return */
+    if(next == END_NOT_FOUND){
         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, (*buf_size) + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
 
         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
         pc->index += *buf_size;
         return -1;
     }
-
+    
+    pc->overread_index= pc->index + next;
+    
+    /* append to buffer */
     if(pc->index){
         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
 
@@ -2617,6 +2826,19 @@ int ff_combine_frame( MpegEncContext *s, int next, uint8_t **buf, int *buf_size)
         *buf_size= pc->last_index + next;
     }
 
+    /* store overread bytes */
+    for(;next < 0; next++){
+        pc->state = (pc->state<<8) | pc->buffer[pc->last_index + next];
+        pc->overread++;
+    }
+
+#if 0
+    if(pc->overread){
+        printf("overread %d, state:%X next:%d index:%d o_index:%d\n", pc->overread, pc->state, next, pc->index, pc->overread_index);
+        printf("%X %X %X %X\n", (*buf)[0], (*buf)[1],(*buf)[2],(*buf)[3]);
+    }
+#endif
+
     return 0;
 }
 
@@ -2639,7 +2861,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
 
     /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
     for(i=0; i<3; i++)
         d->last_dc[i]= s->last_dc[i];
     
@@ -2665,7 +2887,7 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
     
     /* mpeg1 */
-    d->mb_incr= s->mb_incr;
+    d->mb_skip_run= s->mb_skip_run;
     for(i=0; i<3; i++)
         d->last_dc[i]= s->last_dc[i];
     
@@ -2765,13 +2987,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     }
 
     s->picture_number = picture_number;
-
-    s->block_wrap[0]=
-    s->block_wrap[1]=
-    s->block_wrap[2]=
-    s->block_wrap[3]= s->mb_width*2 + 2;
-    s->block_wrap[4]=
-    s->block_wrap[5]= s->mb_width + 2;
     
     /* Reset the average MB variance */
     s->current_picture.mb_var_sum = 0;
@@ -2789,8 +3004,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
     
     if(s->pict_type==I_TYPE){
-        if(s->msmpeg4_version) s->no_rounding=1;
-        else                   s->no_rounding=0;
+        if(s->msmpeg4_version >= 3) s->no_rounding=1;
+        else                        s->no_rounding=0;
     }else if(s->pict_type!=B_TYPE){
         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
             s->no_rounding ^= 1;          
@@ -2840,8 +3055,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         /* I-Frame */
         //FIXME do we need to zero them?
         memset(s->motion_val[0], 0, sizeof(int16_t)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
-        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_width+2)*(s->mb_height+2)*2);
-        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->p_mv_table   , 0, sizeof(int16_t)*(s->mb_stride)*s->mb_height*2);
+        memset(s->mb_type      , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
         
         if(!s->fixed_qscale){
             /* finding spatial complexity for I-frame rate control */
@@ -2855,8 +3070,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     
                    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
 
-                    s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
-                    s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
+                    s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
+                    s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
                     s->current_picture.mb_var_sum    += varc;
                 }
             }
@@ -2866,7 +3081,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
 
     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
         s->pict_type= I_TYPE;
-        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_width*s->mb_height);
+        memset(s->mb_type   , MB_TYPE_INTRA, sizeof(uint8_t)*s->mb_stride*s->mb_height);
 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
     }
 
@@ -2976,9 +3191,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         /* note: quant matrix value (8) is implied here */
         s->last_dc[i] = 128;
         
-        s->current_picture.error[i] = 0;
+        s->current_picture_ptr->error[i] = 0;
     }
-    s->mb_incr = 1;
+    s->mb_skip_run = 0;
     s->last_mv[0][0][0] = 0;
     s->last_mv[0][0][1] = 0;
     s->last_mv[1][0][0] = 0;
@@ -2998,7 +3213,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     s->resync_mb_y=0;
     s->first_slice_line = 1;
     s->ptr_lastgob = s->pb.buf;
-    s->ptr_last_mb_line = s->pb.buf;
     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
@@ -3010,8 +3224,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
-            int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
-            const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
+            const int xy= mb_y*s->mb_stride + mb_x;
+            int mb_type= s->mb_type[xy];
 //            int d;
             int dmin=10000000;
 
@@ -3033,7 +3247,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 is_gob_start=0;
                 
                 if(s->codec_id==CODEC_ID_MPEG4){
-                    if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                        && s->mb_y + s->mb_x>0){
 
                         if(s->partitioned_frame){
@@ -3050,8 +3264,15 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                         ff_mpeg4_clean_buffers(s);
                         is_gob_start=1;
                     }
+                }else if(s->codec_id==CODEC_ID_MPEG1VIDEO){
+                    if(   current_packet_size >= s->rtp_payload_size 
+                       && s->mb_y + s->mb_x>0 && s->mb_skip_run==0){
+                        ff_mpeg1_encode_slice_header(s);
+                        ff_mpeg1_clean_buffers(s);
+                        is_gob_start=1;
+                    }
                 }else{
-                    if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
+                    if(current_packet_size >= s->rtp_payload_size
                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
                        
                         h263_encode_gob_header(s, mb_y);                       
@@ -3157,7 +3378,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                                  &dmin, &next_block, 0, 0);
                     /* force cleaning of ac/dc pred stuff if needed ... */
                     if(s->h263_pred || s->h263_aic)
-                        s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
+                        s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
                 }
                 copy_context_after_encode(s, &best_s, -1);
                 
@@ -3181,16 +3402,16 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             } else {
                 int motion_x, motion_y;
                 int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width];
+                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
                 
               if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){
                 /* get luma score */
                 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
                 }else{
                     uint8_t *dest_y;
 
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME
+                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
                     mean*= 0x01010101;
                     
                     dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
@@ -3206,8 +3427,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                     intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
                                         
 /*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, 
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_width],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/
+                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
+                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
                 }
                 
                 /* get chroma score */
@@ -3348,34 +3569,23 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
 
-                s->current_picture.error[0] += sse(
+                s->current_picture_ptr->error[0] += sse(
                     s,
                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                     s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                     w, h, s->linesize);
-                s->current_picture.error[1] += sse(
+                s->current_picture_ptr->error[1] += sse(
                     s,
                     s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     w>>1, h>>1, s->uvlinesize);
-                s->current_picture.error[2] += sse(
+                s->current_picture_ptr->error[2] += sse(
                     s,
                     s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     w>>1, h>>1, s->uvlinesize);
             }
-//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
-        }
-
-
-        /* Obtain average mb_row size for RTP */
-        if (s->rtp_mode) {
-            if (mb_y==0)
-                s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
-            else {    
-                s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
-            }
-            s->ptr_last_mb_line = pbBufPtr(&s->pb);
+//printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, get_bit_count(&s->pb));
         }
     }
     emms_c();
@@ -3698,8 +3908,8 @@ static int dct_quantize_c(MpegEncContext *s,
         level = block[j];
         level = level * qmat[j];
 
-//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
-//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
+//        if(   bias+level >= (1<<QMAT_SHIFT)
+//           || bias-level >= (1<<QMAT_SHIFT)){
         if(((unsigned)(level+threshold1))>threshold2){
             if(level>0){
                 level= (bias + level)>>QMAT_SHIFT;
@@ -3901,6 +4111,8 @@ char ff_get_pict_type_char(int pict_type){
     case P_TYPE: return 'P'; 
     case B_TYPE: return 'B'; 
     case S_TYPE: return 'S'; 
+    case SI_TYPE:return 'i'; 
+    case SP_TYPE:return 'p'; 
     default:     return '?';
     }
 }
@@ -3920,6 +4132,58 @@ static const AVOption mpeg4_options[] =
                       rc_min_rate, 4, 24000000, 0),
     AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate",
                       rc_max_rate, 4, 24000000, 0),
+    AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity",
+                         rc_buffer_aggressivity, 4, 24000000, 0),
+    AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol",
+                         rc_initial_cplx, 0., 9999999., 0),
+    AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames",
+                         i_quant_factor, 0., 0., 0),
+    AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames",
+                         i_quant_factor, -999999., 999999., 0),
+    AVOPTION_CODEC_INT("dct_algo", "dct alghorithm",
+                      dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec"
+    AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking",
+                         lumi_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking",
+                         temporal_cplx_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking",
+                         spatial_cplx_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("p_masking", "p block masking",
+                         p_masking, 0., 999999., 0),
+    AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking",
+                         dark_masking, 0., 999999., 0),
+    AVOPTION_CODEC_INT("idct_algo", "idct alghorithm",
+                      idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec"
+
+    AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer",
+                      mb_qmin, 0, 8, 0),
+    AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer",
+                      mb_qmin, 0, 8, 0),
+
+    AVOPTION_CODEC_INT("me_cmp", "ME compare function",
+                      me_cmp, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function",
+                      me_sub_cmp, 0, 24000000, 0),
+
+
+    AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape",
+                      dia_size, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors",
+                      last_predictor_count, 0, 24000000, 0),
+
+    AVOPTION_CODEC_INT("pre_me", "pre pass for ME",
+                      pre_me, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function",
+                      me_pre_cmp, 0, 24000000, 0),
+
+    AVOPTION_CODEC_INT("me_range", "maximum ME search range",
+                      me_range, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape",
+                      pre_dia_size, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality",
+                      me_subpel_quality, 0, 24000000, 0),
+    AVOPTION_CODEC_INT("me_range", "maximum ME search range",
+                      me_range, 0, 24000000, 0),
     AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames",
                        flags, CODEC_FLAG_PSNR, 0),
     AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)",
@@ -3991,6 +4255,7 @@ AVCodec msmpeg4v1_encoder = {
     MPV_encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .options = mpeg4_options,
 };
 
 AVCodec msmpeg4v2_encoder = {
@@ -4001,6 +4266,7 @@ AVCodec msmpeg4v2_encoder = {
     MPV_encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .options = mpeg4_options,
 };
 
 AVCodec msmpeg4v3_encoder = {
@@ -4011,6 +4277,7 @@ AVCodec msmpeg4v3_encoder = {
     MPV_encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .options = mpeg4_options,
 };
 
 AVCodec wmv1_encoder = {
@@ -4021,6 +4288,7 @@ AVCodec wmv1_encoder = {
     MPV_encode_init,
     MPV_encode_picture,
     MPV_encode_end,
+    .options = mpeg4_options,
 };
 
 #endif