]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/h263.c
h263_dc_scale() cleanup
[ffmpeg] / libavcodec / h263.c
index d68240bc72cdb1e244e711035a356141485889da..26ada366238e8dceeaf5772175be670276fb6109 100644 (file)
@@ -1,23 +1,29 @@
 /*
  * H263/MPEG4 backend for ffmpeg encoder and decoder
- * Copyright (c) 2000,2001 Gerard Lantau.
+ * Copyright (c) 2000,2001 Fabrice Bellard.
  * H263+ support.
  * Copyright (c) 2001 Juan J. Sierralta P.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
  *
- * This program is distributed in the hope that it will be useful,
+ * This library is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * ac prediction encoding, b-frame support, error resilience, optimizations,
+ * qpel decoding, gmc decoding, interlaced decoding, 
+ * by Michael Niedermayer <michaelni@gmx.at>
  */
+//#define DEBUG
 #include "common.h"
 #include "dsputil.h"
 #include "avcodec.h"
 #include "h263data.h"
 #include "mpeg4data.h"
 
+//#undef NDEBUG
+//#include <assert.h>
+
 //rounded divison & shift
-#define RDIV(a,b) ((a) > 0 ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
-#define ABS(a) (((a)>=0)?(a):(-(a)))
+
+#if 1
+#define PRINT_MB_TYPE(a) {}
+#else
+#define PRINT_MB_TYPE(a) printf(a)
+#endif
+
+#define INTRA_MCBPC_VLC_BITS 6
+#define INTER_MCBPC_VLC_BITS 6
+#define CBPY_VLC_BITS 6
+#define MV_VLC_BITS 9
+#define DC_VLC_BITS 9
+#define SPRITE_TRAJ_VLC_BITS 6
+#define MB_TYPE_B_VLC_BITS 4
+#define TEX_VLC_BITS 9
 
 static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
                              int n);
-static void h263_encode_motion(MpegEncContext * s, int val);
+static void h263_encode_motion(MpegEncContext * s, int val, int fcode);
 static void h263p_encode_umotion(MpegEncContext * s, int val);
-static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
-                              int n, int dc, UINT8 *scan_table);
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int dc, UINT8 *scan_table, 
+                               PutBitContext *dc_pb, PutBitContext *ac_pb);
 static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
 static int h263p_decode_umotion(MpegEncContext * s, int pred);
 static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
                              int n, int coded);
-static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
-                              int n, int coded);
-static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int coded, int intra);
+static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr);
 static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
                               int dir);
+static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
 
 extern UINT32 inverse[256];
 
+static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static UINT8 fcode_tab[MAX_MV*2+1];
+static UINT8 umv_fcode_tab[MAX_MV*2+1];
+
+static UINT16 uni_DCtab_lum  [512][2];
+static UINT16 uni_DCtab_chrom[512][2];
+static UINT32 uni_mpeg4_intra_rl_bits[64*64*2*2];
+static UINT8  uni_mpeg4_intra_rl_len [64*64*2*2];
+static UINT32 uni_mpeg4_inter_rl_bits[64*64*2*2];
+static UINT8  uni_mpeg4_inter_rl_len [64*64*2*2];
+#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level))
+//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64)
+
+/* mpeg4
+inter
+max level: 24/6
+max run: 53/63
+
+intra
+max level: 53/16
+max run: 29/41
+*/
+
 int h263_get_picture_format(int width, int height)
 {
     int format;
 
     if (width == 128 && height == 96)
-       format = 1;
+        format = 1;
     else if (width == 176 && height == 144)
-       format = 2;
+        format = 2;
     else if (width == 352 && height == 288)
-       format = 3;
+        format = 3;
     else if (width == 704 && height == 576)
-       format = 4;
+        format = 4;
     else if (width == 1408 && height == 1152)
-       format = 5;
+        format = 5;
     else
         format = 7;
     return format;
@@ -116,7 +164,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */
         put_bits(&s->pb,1,0); /* SAC: off */
         put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */
-        put_bits(&s->pb,1,0); /* Advanced Intra Coding: off */
+        put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */
         put_bits(&s->pb,1,0); /* Deblocking Filter: off */
         put_bits(&s->pb,1,0); /* Slice Structured: off */
         put_bits(&s->pb,1,0); /* Reference Picture Selection: off */
@@ -130,7 +178,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
                
         put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
         put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
-        put_bits(&s->pb,1,0); /* Rounding Type */
+        if (s->pict_type == I_TYPE)
+            s->no_rounding = 0;
+        else
+            s->no_rounding ^= 1;
+        put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
         put_bits(&s->pb,2,0); /* Reserved */
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
                
@@ -140,10 +192,18 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
                if (format == 7) {
             /* Custom Picture Format (CPFMT) */
                
-            put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
+           if (s->aspect_ratio_info)
+               put_bits(&s->pb,4,s->aspect_ratio_info);
+           else
+               put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
             put_bits(&s->pb,9,(s->width >> 2) - 1);
             put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
             put_bits(&s->pb,9,(s->height >> 2));
+           if (s->aspect_ratio_info == FF_ASPECT_EXTENDED)
+           {
+               put_bits(&s->pb, 8, s->aspected_width);
+               put_bits(&s->pb, 8, s->aspected_height);
+           }
         }
         
         /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
@@ -153,6 +213,14 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
     }
 
     put_bits(&s->pb, 1, 0);    /* no PEI */
+
+    if(s->h263_aic){
+         s->y_dc_scale_table= 
+         s->c_dc_scale_table= h263_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
 }
 
 int h263_encode_gob_header(MpegEncContext * s, int mb_line)
@@ -213,22 +281,50 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
         ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
         ac_val1= ac_val;
         if(dir[n]){
+            const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width;
+            /* top prediction */
             ac_val-= s->block_wrap[n]*16;
-            for(i=1; i<8; i++){
-                const int level= block[n][block_permute_op(i   )];
-                score0+= ABS(level);
-                score1+= ABS(level - ac_val[i+8]);
-                ac_val1[i  ]=    block[n][block_permute_op(i<<3)];
-                ac_val1[i+8]= level;
+            if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][block_permute_op(i   )];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ac_val[i+8]);
+                    ac_val1[i  ]=    block[n][block_permute_op(i<<3)];
+                    ac_val1[i+8]= level;
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][block_permute_op(i   )];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale));
+                    ac_val1[i  ]=    block[n][block_permute_op(i<<3)];
+                    ac_val1[i+8]= level;
+                }
             }
         }else{
+            const int xy= s->mb_x-1 + s->mb_y*s->mb_width;
+            /* left prediction */
             ac_val-= 16;
-            for(i=1; i<8; i++){
-                const int level= block[n][block_permute_op(i<<3)];
-                score0+= ABS(level);
-                score1+= ABS(level - ac_val[i]);
-                ac_val1[i  ]= level;
-                ac_val1[i+8]=    block[n][block_permute_op(i   )];
+            if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][block_permute_op(i<<3)];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ac_val[i]);
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][block_permute_op(i   )];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][block_permute_op(i<<3)];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale));
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][block_permute_op(i   )];
+                }
             }
         }
     }
@@ -236,45 +332,323 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
     return score0 > score1 ? 1 : 0;    
 }
 
+void ff_clean_h263_qscales(MpegEncContext *s){
+    int i;
+    
+    for(i=1; i<s->mb_num; i++){
+        if(s->qscale_table[i] - s->qscale_table[i-1] >2)
+            s->qscale_table[i]= s->qscale_table[i-1]+2;
+    }
+    for(i=s->mb_num-2; i>=0; i--){
+        if(s->qscale_table[i] - s->qscale_table[i+1] >2)
+            s->qscale_table[i]= s->qscale_table[i+1]+2;
+    }
+}
+
+void ff_clean_mpeg4_qscales(MpegEncContext *s){
+    int i;
+    
+    ff_clean_h263_qscales(s);
+    
+    for(i=1; i<s->mb_num; i++){
+        if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_INTER4V)){
+            s->mb_type[i]&= ~MB_TYPE_INTER4V;
+            s->mb_type[i]|= MB_TYPE_INTER;
+        }
+    }
+
+    if(s->pict_type== B_TYPE){
+        int odd=0;
+        /* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than
+           for the actual adaptive quantization */
+        
+        for(i=0; i<s->mb_num; i++){
+            odd += s->qscale_table[i]&1;
+        }
+        
+        if(2*odd > s->mb_num) odd=1;
+        else                  odd=0;
+        
+        for(i=0; i<s->mb_num; i++){
+            if((s->qscale_table[i]&1) != odd)
+                s->qscale_table[i]++;
+            if(s->qscale_table[i] > 31)
+                s->qscale_table[i]= 31;
+        }            
+    
+        for(i=1; i<s->mb_num; i++){
+            if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_DIRECT)){
+                s->mb_type[i]&= ~MB_TYPE_DIRECT;
+                s->mb_type[i]|= MB_TYPE_BIDIR;
+            }
+        }
+    }
+}
+
 void mpeg4_encode_mb(MpegEncContext * s,
                    DCTELEM block[6][64],
                    int motion_x, int motion_y)
 {
-    int cbpc, cbpy, i, cbp, pred_x, pred_y;
+    int cbpc, cbpy, i, pred_x, pred_y;
+    int bits;
+    PutBitContext * const pb2    = s->data_partitioning                         ? &s->pb2    : &s->pb;
+    PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
+    PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2    : &s->pb;
+    const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
+    const int dquant_code[5]= {1,0,9,2,3};
     
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
-        cbp = 0;
+        int cbp = 0;
         for (i = 0; i < 6; i++) {
-        if (s->block_last_index[i] >= 0)
-            cbp |= 1 << (5 - i);
-        }
-        if ((cbp | motion_x | motion_y) == 0) {
-            /* skip macroblock */
-            put_bits(&s->pb, 1, 1);
-            return;
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
         }
-        put_bits(&s->pb, 1, 0);        /* mb coded */
-        cbpc = cbp & 3;
-        put_bits(&s->pb,
-                inter_MCBPC_bits[cbpc],
-                inter_MCBPC_code[cbpc]);
-        cbpy = cbp >> 2;
-        cbpy ^= 0xf;
-        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
 
-        /* motion vectors: 16x16 mode only now */
-        h263_pred_motion(s, 0, &pred_x, &pred_y);
-      
-        h263_encode_motion(s, motion_x - pred_x);
-        h263_encode_motion(s, motion_y - pred_y);
+        if(s->pict_type==B_TYPE){
+            static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
+            int mb_type=  mb_type_table[s->mv_dir];
+            
+            if(s->mb_x==0){
+                s->last_mv[0][0][0]= 
+                s->last_mv[0][0][1]= 
+                s->last_mv[1][0][0]= 
+                s->last_mv[1][0][1]= 0;
+            }
+            
+            assert(s->dquant>=-2 && s->dquant<=2);
+            assert((s->dquant&1)==0);
+            assert(mb_type>=0);
+
+            /* nothing to do if this MB was skiped in the next P Frame */
+            if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ //FIXME avoid DCT & ...
+                s->skip_count++;
+                s->mv[0][0][0]= 
+                s->mv[0][0][1]= 
+                s->mv[1][0][0]= 
+                s->mv[1][0][1]= 0;
+                s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+                s->qscale -= s->dquant;
+                return;
+            }
 
-        /* encode each block */
-        for (i = 0; i < 6; i++) {
-            mpeg4_encode_block(s, block[i], i, 0, zigzag_direct);
+            if ((cbp | motion_x | motion_y | mb_type) ==0) {
+                /* direct MB with MV={0,0} */
+                assert(s->dquant==0);
+                
+                put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
+
+                if(interleaved_stats){
+                    s->misc_bits++;
+                    s->last_bits++;
+                }
+                s->skip_count++;
+                return;
+            }
+            put_bits(&s->pb, 1, 0);    /* mb coded modb1=0 */
+            put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
+            put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
+            if(cbp) put_bits(&s->pb, 6, cbp);
+            
+            if(cbp && mb_type){
+                if(s->dquant)
+                    put_bits(&s->pb, 2, (s->dquant>>2)+3);
+                else
+                    put_bits(&s->pb, 1, 0);
+            }else
+                s->qscale -= s->dquant;
+            
+            if(!s->progressive_sequence){
+                if(cbp)
+                    put_bits(&s->pb, 1, s->interlaced_dct);
+                if(mb_type) // not diect mode
+                    put_bits(&s->pb, 1, 0); // no interlaced ME yet
+            }
+
+            if(interleaved_stats){
+                bits= get_bit_count(&s->pb);
+                s->misc_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+            }
+
+            switch(mb_type)
+            {
+            case 0: /* direct */
+                h263_encode_motion(s, motion_x, 1);
+                h263_encode_motion(s, motion_y, 1);                
+                s->b_count++;
+                s->f_count++;
+                break;
+            case 1: /* bidir */
+                h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+                h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
+                h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+                h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
+                s->last_mv[0][0][0]= s->mv[0][0][0];
+                s->last_mv[0][0][1]= s->mv[0][0][1];
+                s->last_mv[1][0][0]= s->mv[1][0][0];
+                s->last_mv[1][0][1]= s->mv[1][0][1];
+                s->b_count++;
+                s->f_count++;
+                break;
+            case 2: /* backward */
+                h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
+                h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
+                s->last_mv[1][0][0]= motion_x;
+                s->last_mv[1][0][1]= motion_y;
+                s->b_count++;
+                break;
+            case 3: /* forward */
+                h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
+                h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
+                s->last_mv[0][0][0]= motion_x;
+                s->last_mv[0][0][1]= motion_y;
+                s->f_count++;
+                break;
+            default:
+                printf("unknown mb type\n");
+                return;
+            }
+
+            if(interleaved_stats){
+                bits= get_bit_count(&s->pb);
+                s->mv_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+            }
+
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb);
+            }
+
+            if(interleaved_stats){
+                bits= get_bit_count(&s->pb);
+                s->p_tex_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+            }
+        }else{ /* s->pict_type==B_TYPE */
+            if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
+                /* check if the B frames can skip it too, as we must skip it if we skip here 
+                   why didnt they just compress the skip-mb bits instead of reusing them ?! */
+                if(s->max_b_frames>0){
+                    int i;
+                    int x,y, offset;
+                    uint8_t *p_pic;
+
+                    x= s->mb_x*16;
+                    y= s->mb_y*16;
+                    if(x+16 > s->width)  x= s->width-16;
+                    if(y+16 > s->height) y= s->height-16;
+
+                    offset= x + y*s->linesize;
+                    p_pic= s->new_picture[0] + offset;
+                    
+                    s->mb_skiped=1;
+                    for(i=0; i<s->max_b_frames; i++){
+                        uint8_t *b_pic;
+                        int diff;
+
+                        if(s->coded_order[i+1].pict_type!=B_TYPE) break;
+
+                        b_pic= s->coded_order[i+1].picture[0] + offset;
+                        diff= pix_abs16x16(p_pic, b_pic, s->linesize);
+                        if(diff>s->qscale*70){ //FIXME check that 70 is optimal
+                            s->mb_skiped=0;
+                            break;
+                        }
+                    }
+                }else
+                    s->mb_skiped=1; 
+
+                if(s->mb_skiped==1){
+                    /* skip macroblock */
+                    put_bits(&s->pb, 1, 1);
+
+                    if(interleaved_stats){
+                        s->misc_bits++;
+                        s->last_bits++;
+                    }
+                    s->skip_count++;
+                    return;
+                }
+            }
+
+            put_bits(&s->pb, 1, 0);    /* mb coded */
+            if(s->mv_type==MV_TYPE_16X16){
+                cbpc = cbp & 3;
+                if(s->dquant) cbpc+= 8;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+
+                cbpy = cbp >> 2;
+                cbpy ^= 0xf;
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+                if(s->dquant)
+                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
+
+                if(!s->progressive_sequence){
+                    if(cbp)
+                        put_bits(pb2, 1, s->interlaced_dct);
+                    put_bits(pb2, 1, 0); // no interlaced ME yet
+                }
+                    
+                if(interleaved_stats){
+                    bits= get_bit_count(&s->pb);
+                    s->misc_bits+= bits - s->last_bits;
+                    s->last_bits=bits;
+                }
+
+                /* motion vectors: 16x16 mode */
+                h263_pred_motion(s, 0, &pred_x, &pred_y);
+            
+                h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                h263_encode_motion(s, motion_y - pred_y, s->f_code);
+            }else{
+                cbpc = (cbp & 3)+16;
+                put_bits(&s->pb,
+                        inter_MCBPC_bits[cbpc],
+                        inter_MCBPC_code[cbpc]);
+                cbpy = cbp >> 2;
+                cbpy ^= 0xf;
+                put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+
+                if(interleaved_stats){
+                    bits= get_bit_count(&s->pb);
+                    s->misc_bits+= bits - s->last_bits;
+                    s->last_bits=bits;
+                }
+
+                for(i=0; i<4; i++){
+                    /* motion vectors: 8x8 mode*/
+                    h263_pred_motion(s, i, &pred_x, &pred_y);
+
+                    h263_encode_motion(s, s->motion_val[ s->block_index[i] ][0] - pred_x, s->f_code);
+                    h263_encode_motion(s, s->motion_val[ s->block_index[i] ][1] - pred_y, s->f_code);
+                }
+            }
+
+            if(interleaved_stats){ 
+                bits= get_bit_count(&s->pb);
+                s->mv_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+            }
+
+            /* encode each block */
+            for (i = 0; i < 6; i++) {
+                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb);
+            }
+
+            if(interleaved_stats){
+                bits= get_bit_count(&s->pb);
+                s->p_tex_bits+= bits - s->last_bits;
+                s->last_bits=bits;
+            }
+            s->f_count++;
         }
     } else {
+        int cbp;
         int dc_diff[6];   //dc values with the dc prediction subtracted 
         int dir[6];  //prediction direction
         int zigzag_last_index[6];
@@ -284,7 +658,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
             const int level= block[i][0];
             UINT16 *dc_ptr;
 
-            dc_diff[i]= level - mpeg4_pred_dc(s, i, &dc_ptr, &dir[i]);
+            dc_diff[i]= level - ff_mpeg4_pred_dc(s, i, &dc_ptr, &dir[i]);
             if (i < 4) {
                 *dc_ptr = level * s->y_dc_scale;
             } else {
@@ -323,24 +697,45 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
         cbpc = cbp & 3;
         if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
             put_bits(&s->pb,
                 intra_MCBPC_bits[cbpc],
                 intra_MCBPC_code[cbpc]);
         } else {
+            if(s->dquant) cbpc+=8;
             put_bits(&s->pb, 1, 0);    /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
                 inter_MCBPC_code[cbpc + 4]);
         }
-        put_bits(&s->pb, 1, s->ac_pred);
+        put_bits(pb2, 1, s->ac_pred);
         cbpy = cbp >> 2;
-        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(dc_pb, 2, dquant_code[s->dquant+2]);
+
+        if(!s->progressive_sequence){
+            put_bits(dc_pb, 1, s->interlaced_dct);
+        }
+
+        if(interleaved_stats){
+            bits= get_bit_count(&s->pb);
+            s->misc_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+        }
 
         /* encode each block */
         for (i = 0; i < 6; i++) {
-            mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i]);
+            mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i], dc_pb, tex_pb);
         }
 
+        if(interleaved_stats){
+            bits= get_bit_count(&s->pb);
+            s->i_tex_bits+= bits - s->last_bits;
+            s->last_bits=bits;
+        }
+        s->i_count++;
+
         /* restore ac coeffs & last_index stuff if we messed them up with the prediction */
         if(s->ac_pred){
             for(i=0; i<6; i++){
@@ -367,82 +762,177 @@ void h263_encode_mb(MpegEncContext * s,
                    int motion_x, int motion_y)
 {
     int cbpc, cbpy, i, cbp, pred_x, pred_y;
-   
-    //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
-   if (!s->mb_intra) {
-          /* compute cbp */
-          cbp = 0;
-          for (i = 0; i < 6; i++) {
-             if (s->block_last_index[i] >= 0)
-                  cbp |= 1 << (5 - i);
-          }
-          if ((cbp | motion_x | motion_y) == 0) {
-             /* skip macroblock */
-             put_bits(&s->pb, 1, 1);
-             return;
-          }
-          put_bits(&s->pb, 1, 0);      /* mb coded */
-          cbpc = cbp & 3;
-          put_bits(&s->pb,
-               inter_MCBPC_bits[cbpc],
-               inter_MCBPC_code[cbpc]);
-          cbpy = cbp >> 2;
-          cbpy ^= 0xf;
-          put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-
-          /* motion vectors: 16x16 mode only now */
-      h263_pred_motion(s, 0, &pred_x, &pred_y);
+    INT16 pred_dc;
+    INT16 rec_intradc[6];
+    UINT16 *dc_ptr[6];
+    const int dquant_code[5]= {1,0,9,2,3};
+           
+    //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
+    if (!s->mb_intra) {
+        /* compute cbp */
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+        if ((cbp | motion_x | motion_y | s->dquant) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
+            return;
+        }
+        put_bits(&s->pb, 1, 0);        /* mb coded */
+        cbpc = cbp & 3;
+        if(s->dquant) cbpc+= 8;
+        put_bits(&s->pb,
+                   inter_MCBPC_bits[cbpc],
+                   inter_MCBPC_code[cbpc]);
+        cbpy = cbp >> 2;
+        cbpy ^= 0xf;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+        /* motion vectors: 16x16 mode only now */
+        h263_pred_motion(s, 0, &pred_x, &pred_y);
       
-      if (!s->umvplus) {  
-         h263_encode_motion(s, motion_x - pred_x);
-         h263_encode_motion(s, motion_y - pred_y);
-      }
-      else {
-         h263p_encode_umotion(s, motion_x - pred_x);
-         h263p_encode_umotion(s, motion_y - pred_y);
-         if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
-            /* To prevent Start Code emulation */
-            put_bits(&s->pb,1,1);
-      }
-   } else {
-       /* compute cbp */
-       cbp = 0;
-       for (i = 0; i < 6; i++) {
-           if (s->block_last_index[i] >= 1)
-               cbp |= 1 << (5 - i);
-       }
+        if (!s->umvplus) {  
+            h263_encode_motion(s, motion_x - pred_x, s->f_code);
+            h263_encode_motion(s, motion_y - pred_y, s->f_code);
+        }
+        else {
+            h263p_encode_umotion(s, motion_x - pred_x);
+            h263p_encode_umotion(s, motion_y - pred_y);
+            if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                /* To prevent Start Code emulation */
+                put_bits(&s->pb,1,1);
+        }
+    } else {
+        int li = s->h263_aic ? 0 : 1;
+        
+        cbp = 0;
+        for(i=0; i<6; i++) {
+            /* Predict DC */
+            if (s->h263_aic && s->mb_intra) {
+                INT16 level = block[i][0];
+            
+                pred_dc = h263_pred_dc(s, i, &dc_ptr[i]);
+                level -= pred_dc;
+                /* Quant */
+                if (level < 0)
+                    level = (level + (s->qscale >> 1))/(s->y_dc_scale);
+                else
+                    level = (level - (s->qscale >> 1))/(s->y_dc_scale);
+                    
+                /* AIC can change CBP */
+                if (level == 0 && s->block_last_index[i] == 0)
+                    s->block_last_index[i] = -1;
+                else if (level < -127)
+                    level = -127;
+                else if (level > 127)
+                    level = 127;
+                
+                block[i][0] = level;
+                /* Reconstruction */ 
+                rec_intradc[i] = (s->y_dc_scale*level) + pred_dc;
+                /* Oddify */
+                rec_intradc[i] |= 1;
+                //if ((rec_intradc[i] % 2) == 0)
+                //    rec_intradc[i]++;
+                /* Clipping */
+                if (rec_intradc[i] < 0)
+                    rec_intradc[i] = 0;
+                else if (rec_intradc[i] > 2047)
+                    rec_intradc[i] = 2047;
+                                
+                /* Update AC/DC tables */
+                *dc_ptr[i] = rec_intradc[i];
+            }
+            /* compute cbp */
+            if (s->block_last_index[i] >= li)
+                cbp |= 1 << (5 - i);
+        }
 
-       cbpc = cbp & 3;
-       if (s->pict_type == I_TYPE) {
-           put_bits(&s->pb,
-                    intra_MCBPC_bits[cbpc],
-                    intra_MCBPC_code[cbpc]);
-       } else {
-           put_bits(&s->pb, 1, 0);     /* mb coded */
-           put_bits(&s->pb,
-                    inter_MCBPC_bits[cbpc + 4],
-                    inter_MCBPC_code[cbpc + 4]);
-       }
-       if (s->h263_pred) {
-           /* XXX: currently, we do not try to use ac prediction */
-           put_bits(&s->pb, 1, 0);     /* no ac prediction */
-       }
-       cbpy = cbp >> 2;
-       put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        cbpc = cbp & 3;
+        if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
+            put_bits(&s->pb,
+                intra_MCBPC_bits[cbpc],
+                intra_MCBPC_code[cbpc]);
+        } else {
+            if(s->dquant) cbpc+=8;
+            put_bits(&s->pb, 1, 0);    /* mb coded */
+            put_bits(&s->pb,
+                inter_MCBPC_bits[cbpc + 4],
+                inter_MCBPC_code[cbpc + 4]);
+        }
+        if (s->h263_aic) {
+            /* XXX: currently, we do not try to use ac prediction */
+            put_bits(&s->pb, 1, 0);    /* no AC prediction */
+        }
+        cbpy = cbp >> 2;
+        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
     }
 
-    /* encode each block */
-    if (s->h263_pred) {
-       for (i = 0; i < 6; i++) {
-//         mpeg4_encode_block(s, block[i], i);
-       }
+    for(i=0; i<6; i++) {
+        /* encode each block */
+        h263_encode_block(s, block[i], i);
+    
+        /* Update INTRADC for decoding */
+        if (s->h263_aic && s->mb_intra) {
+            block[i][0] = rec_intradc[i];
+            
+        }
+    }
+}
+
+static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr)
+{
+    int x, y, wrap, a, c, pred_dc, scale;
+    INT16 *dc_val, *ac_val;
+
+    /* find prediction */
+    if (n < 4) {
+        x = 2 * s->mb_x + 1 + (n & 1);
+        y = 2 * s->mb_y + 1 + ((n & 2) >> 1);
+        wrap = s->mb_width * 2 + 2;
+        dc_val = s->dc_val[0];
+        ac_val = s->ac_val[0][0];
+        scale = s->y_dc_scale;
     } else {
-       for (i = 0; i < 6; i++) {
-           h263_encode_block(s, block[i], i);
-       }
+        x = s->mb_x + 1;
+        y = s->mb_y + 1;
+        wrap = s->mb_width + 2;
+        dc_val = s->dc_val[n - 4 + 1];
+        ac_val = s->ac_val[n - 4 + 1][0];
+        scale = s->c_dc_scale;
     }
+    /* B C
+     * A X 
+     */
+    a = dc_val[(x - 1) + (y) * wrap];
+    c = dc_val[(x) + (y - 1) * wrap];
+    
+    /* No prediction outside GOB boundary */
+    if (s->first_slice_line && ((n < 2) || (n > 3)))
+        c = 1024;
+    pred_dc = 1024;
+    /* just DC prediction */
+    if (a != 1024 && c != 1024)
+        pred_dc = (a + c) >> 1;
+    else if (a != 1024)
+        pred_dc = a;
+    else
+        pred_dc = c;
+    
+    /* we assume pred is positive */
+    //pred_dc = (pred_dc + (scale >> 1)) / scale;
+    *dc_val_ptr = &dc_val[x + y * wrap];
+    return pred_dc;
 }
 
+
 void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
 {
     int x, y, wrap, a, c, pred_dc, scale, i;
@@ -474,6 +964,9 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
     a = dc_val[(x - 1) + (y) * wrap];
     c = dc_val[(x) + (y - 1) * wrap];
     
+    /* No prediction outside GOB boundary */
+    if (s->first_slice_line && ((n < 2) || (n > 3)))
+        c = 1024;
     pred_dc = 1024;
     if (s->ac_pred) {
         if (s->h263_aic_dir) {
@@ -524,24 +1017,6 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
         ac_val1[8 + i] = block[block_permute_op(i)];
 }
 
-
-static inline int mid_pred(int a, int b, int c)
-{
-    int vmin, vmax;
-    vmax = vmin = a;
-    if (b < vmin)
-        vmin = b;
-    else
-       vmax = b;
-
-    if (c < vmin)
-        vmin = c;
-    else if (c > vmax)
-        vmax = c;
-
-    return a + b + c - vmin - vmax;
-}
-
 INT16 *h263_pred_motion(MpegEncContext * s, int block, 
                         int *px, int *py)
 {
@@ -554,13 +1029,46 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
 
     mot_val = s->motion_val[xy];
 
-    /* special case for first line */
-    if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
-        A = s->motion_val[xy - 1];
-        *px = A[0];
-        *py = A[1];
+    A = s->motion_val[xy - 1];
+    /* special case for first (slice) line */
+    if ((s->mb_y == 0 || s->first_slice_line) && block<3) {
+        // we cant just change some MVs to simulate that as we need them for the B frames (and ME)
+        // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
+        if(block==0){ //most common case
+            if(s->mb_x  == s->resync_mb_x){ //rare
+                *px= *py = 0;
+            }else if(s->mb_x + 1 == s->resync_mb_x){ //rare
+                C = s->motion_val[xy + off[block] - wrap];
+                if(s->mb_x==0){
+                    *px = C[0];
+                    *py = C[1];
+                }else{
+                    *px = mid_pred(A[0], 0, C[0]);
+                    *py = mid_pred(A[1], 0, C[1]);
+                }
+            }else{
+                *px = A[0];
+                *py = A[1];
+            }
+        }else if(block==1){
+            if(s->mb_x + 1 == s->resync_mb_x){ //rare
+                C = s->motion_val[xy + off[block] - wrap];
+                *px = mid_pred(A[0], 0, C[0]);
+                *py = mid_pred(A[1], 0, C[1]);
+            }else{
+                *px = A[0];
+                *py = A[1];
+            }
+        }else{ /* block==2*/
+            B = s->motion_val[xy - wrap];
+            C = s->motion_val[xy + off[block] - wrap];
+            if(s->mb_x == s->resync_mb_x) //rare
+                A[0]=A[1]=0;
+    
+            *px = mid_pred(A[0], B[0], C[0]);
+            *py = mid_pred(A[1], B[1], C[1]);
+        }
     } else {
-        A = s->motion_val[xy - 1];
         B = s->motion_val[xy - wrap];
         C = s->motion_val[xy + off[block] - wrap];
         *px = mid_pred(A[0], B[0], C[0]);
@@ -569,7 +1077,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
     return mot_val;
 }
 
-static void h263_encode_motion(MpegEncContext * s, int val)
+static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
 {
     int range, l, m, bit_size, sign, code, bits;
 
@@ -578,7 +1086,7 @@ static void h263_encode_motion(MpegEncContext * s, int val)
         code = 0;
         put_bits(&s->pb, mvtab[code][1], mvtab[code][0]);
     } else {
-        bit_size = s->f_code - 1;
+        bit_size = f_code - 1;
         range = 1 << bit_size;
         /* modulo encoding */
         l = range * 32;
@@ -588,19 +1096,17 @@ static void h263_encode_motion(MpegEncContext * s, int val)
         } else if (val >= l) {
             val -= m;
         }
+        assert(val>=-l && val<l);
 
         if (val >= 0) {
-            val--;
-            code = (val >> bit_size) + 1;
-            bits = val & (range - 1);
             sign = 0;
         } else {
             val = -val;
-            val--;
-            code = (val >> bit_size) + 1;
-            bits = val & (range - 1);
             sign = 1;
         }
+        val--;
+        code = (val >> bit_size) + 1;
+        bits = val & (range - 1);
 
         put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); 
         if (bit_size > 0) {
@@ -648,60 +1154,279 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
     }
 }
 
-void h263_encode_init_vlc(MpegEncContext *s)
+static void init_mv_penalty_and_fcode(MpegEncContext *s)
 {
-    static int done = 0;
+    int f_code;
+    int mv;
+    for(f_code=1; f_code<=MAX_FCODE; f_code++){
+        for(mv=-MAX_MV; mv<=MAX_MV; mv++){
+            int len;
 
-    if (!done) {
-        done = 1;
-        init_rl(&rl_inter);
-        init_rl(&rl_intra);
+            if(mv==0) len= mvtab[0][1];
+            else{
+                int val, bit_size, range, code;
+
+                bit_size = s->f_code - 1;
+                range = 1 << bit_size;
+
+                val=mv;
+                if (val < 0) 
+                    val = -val;
+                val--;
+                code = (val >> bit_size) + 1;
+                if(code<33){
+                    len= mvtab[code][1] + 1 + bit_size;
+                }else{
+                    len= mvtab[32][1] + 2 + bit_size;
+                }
+            }
+
+            mv_penalty[f_code][mv+MAX_MV]= len;
+        }
+    }
+
+    for(f_code=MAX_FCODE; f_code>0; f_code--){
+        for(mv=-(16<<f_code); mv<(16<<f_code); mv++){
+            fcode_tab[mv+MAX_MV]= f_code;
+        }
+    }
+
+    for(mv=0; mv<MAX_MV*2+1; mv++){
+        umv_fcode_tab[mv]= 1;
     }
 }
 
-static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
+static void init_uni_dc_tab(void)
 {
-    int level, run, last, i, j, last_index, last_non_zero, sign, slevel;
-    int code;
-    RLTable *rl = &rl_inter;
-
-    if (s->mb_intra) {
-        /* DC coef */
-           level = block[0];
-        /* 255 cannot be represented, so we clamp */
-        if (level > 254) {
-            level = 254;
-            block[0] = 254;
+    int level, uni_code, uni_len;
+
+    for(level=-256; level<256; level++){
+        int size, v, l;
+        /* find number of bits */
+        size = 0;
+        v = abs(level);
+        while (v) {
+            v >>= 1;
+           size++;
         }
-        /* 0 cannot be represented also */
-        else if (!level) {
-            level = 1;
-            block[0] = 1;
+
+        if (level < 0)
+            l= (-level) ^ ((1 << size) - 1);
+        else
+            l= level;
+
+        /* luminance */
+        uni_code= DCtab_lum[size][0];
+        uni_len = DCtab_lum[size][1];
+
+        if (size > 0) {
+            uni_code<<=size; uni_code|=l;
+            uni_len+=size;
+            if (size > 8){
+                uni_code<<=1; uni_code|=1;
+                uni_len++;
+            }
         }
-           if (level == 128)
+        uni_DCtab_lum[level+256][0]= uni_code;
+        uni_DCtab_lum[level+256][1]= uni_len;
+
+        /* chrominance */
+        uni_code= DCtab_chrom[size][0];
+        uni_len = DCtab_chrom[size][1];
+        
+        if (size > 0) {
+            uni_code<<=size; uni_code|=l;
+            uni_len+=size;
+            if (size > 8){
+                uni_code<<=1; uni_code|=1;
+                uni_len++;
+            }
+        }
+        uni_DCtab_chrom[level+256][0]= uni_code;
+        uni_DCtab_chrom[level+256][1]= uni_len;
+
+    }
+}
+
+static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab){
+    int slevel, run, last;
+    
+    assert(MAX_LEVEL >= 64);
+    assert(MAX_RUN   >= 63);
+
+    for(slevel=-64; slevel<64; slevel++){
+        if(slevel==0) continue;
+        for(run=0; run<64; run++){
+            for(last=0; last<=1; last++){
+                const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64);
+                int level= slevel < 0 ? -slevel : slevel;
+                int sign= slevel < 0 ? 1 : 0;
+                int bits, len, code;
+                int level1, run1;
+                
+                len_tab[index]= 100;
+                     
+                /* ESC0 */
+                code= get_rl_index(rl, last, run, level);
+                bits= rl->table_vlc[code][0];
+                len=  rl->table_vlc[code][1];
+                bits=bits*2+sign; len++;
+                
+                if(code!=rl->n && len < len_tab[index]){
+                    bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+#if 1
+                /* ESC1 */
+                bits= rl->table_vlc[rl->n][0];
+                len=  rl->table_vlc[rl->n][1];
+                bits=bits*2;    len++; //esc1
+                level1= level - rl->max_level[last][run];
+                if(level1>0){
+                    code= get_rl_index(rl, last, run, level1);
+                    bits<<= rl->table_vlc[code][1];
+                    len  += rl->table_vlc[code][1];
+                    bits += rl->table_vlc[code][0];
+                    bits=bits*2+sign; len++;
+                
+                    if(code!=rl->n && len < len_tab[index]){
+                        bits_tab[index]= bits;
+                        len_tab [index]= len;
+                    }
+                }
+#endif 
+#if 1
+                /* ESC2 */
+                bits= rl->table_vlc[rl->n][0];
+                len=  rl->table_vlc[rl->n][1];
+                bits=bits*4+2;    len+=2; //esc2
+                run1 = run - rl->max_run[last][level] - 1;
+                if(run1>=0){
+                    code= get_rl_index(rl, last, run1, level);
+                    bits<<= rl->table_vlc[code][1];
+                    len  += rl->table_vlc[code][1];
+                    bits += rl->table_vlc[code][0];
+                    bits=bits*2+sign; len++;
+                
+                    if(code!=rl->n && len < len_tab[index]){
+                        bits_tab[index]= bits;
+                        len_tab [index]= len;
+                    }
+                }
+#endif           
+                /* ESC3 */        
+                bits= rl->table_vlc[rl->n][0];
+                len = rl->table_vlc[rl->n][1];
+                bits=bits*4+3;    len+=2; //esc3
+                bits=bits*2+last; len++;
+                bits=bits*64+run; len+=6;
+                bits=bits*2+1;    len++;  //marker
+                bits=bits*4096+(slevel&0xfff); len+=12;
+                bits=bits*2+1;    len++;  //marker
+                
+                if(len < len_tab[index]){
+                    bits_tab[index]= bits;
+                    len_tab [index]= len;
+                }
+            }
+        }
+    }
+}
+
+void h263_encode_init(MpegEncContext *s)
+{
+    static int done = 0;
+
+    if (!done) {
+        done = 1;
+
+        init_uni_dc_tab();
+
+        init_rl(&rl_inter);
+        init_rl(&rl_intra);
+        init_rl(&rl_intra_aic);
+        
+        init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len);
+        init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len);
+
+        init_mv_penalty_and_fcode(s);
+    }
+    s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
+    
+    // use fcodes >1 only for mpeg4 & h263 & h263p FIXME
+    switch(s->codec_id){
+    case CODEC_ID_MPEG4:
+        s->fcode_tab= fcode_tab;
+        s->min_qcoeff= -2048;
+        s->max_qcoeff=  2047;
+        break;
+    case CODEC_ID_H263P:
+        s->fcode_tab= umv_fcode_tab;
+        s->min_qcoeff= -128;
+        s->max_qcoeff=  127;
+        break;
+        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later 
+    default: //nothing needed default table allready set in mpegvideo.c
+        s->min_qcoeff= -128;
+        s->max_qcoeff=  127;
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+
+    if(s->mpeg_quant){
+        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
+        s->inter_quant_bias= 0;
+    }else{
+        s->intra_quant_bias=0;
+        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
+    }
+}
+
+static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
+{
+    int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code;
+    RLTable *rl;
+
+    rl = &rl_inter;
+    if (s->mb_intra && !s->h263_aic) {
+        /* DC coef */
+           level = block[0];
+        /* 255 cannot be represented, so we clamp */
+        if (level > 254) {
+            level = 254;
+            block[0] = 254;
+        }
+        /* 0 cannot be represented also */
+        else if (!level) {
+            level = 1;
+            block[0] = 1;
+        }
+           if (level == 128)
                put_bits(&s->pb, 8, 0xff);
            else
                put_bits(&s->pb, 8, level & 0xff);
            i = 1;
     } else {
            i = 0;
+           if (s->h263_aic && s->mb_intra)
+               rl = &rl_intra_aic;
     }
-
+   
     /* AC coefs */
     last_index = s->block_last_index[n];
     last_non_zero = i - 1;
     for (; i <= last_index; i++) {
-       j = zigzag_direct[i];
-       level = block[j];
-       if (level) {
-           run = i - last_non_zero - 1;
-           last = (i == last_index);
-           sign = 0;
-           slevel = level;
-           if (level < 0) {
-               sign = 1;
-               level = -level;
-           }
+        j = zigzag_direct[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
             code = get_rl_index(rl, last, run, level);
             put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
             if (code == rl->n) {
@@ -711,51 +1436,89 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
             } else {
                 put_bits(&s->pb, 1, sign);
             }
-           last_non_zero = i;
-       }
+               last_non_zero = i;
+           }
     }
 }
 
 /***************************************************/
 
-static void mpeg4_stuffing(PutBitContext * pbc)
+void ff_mpeg4_stuffing(PutBitContext * pbc)
 {
     int length;
     put_bits(pbc, 1, 0);
     length= (-get_bit_count(pbc))&7;
-    put_bits(pbc, length, (1<<length)-1);
+    if(length) put_bits(pbc, length, (1<<length)-1);
 }
 
-static void put_string(PutBitContext * pbc, char *s)
-{
-    while(*s){
-        put_bits(pbc, 8, *s);
-        s++;
+/* must be called before writing the header */
+void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
+    int time_div, time_mod;
+
+    if(s->pict_type==I_TYPE){ //we will encode a vol header
+        s->time_increment_resolution= s->frame_rate/ff_gcd(s->frame_rate, FRAME_RATE_BASE);
+        if(s->time_increment_resolution>=256*256) s->time_increment_resolution= 256*128;
+
+        s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+    }
+    
+    if(s->avctx->pts)
+        s->time= (s->avctx->pts*s->time_increment_resolution + 500*1000)/(1000*1000);
+    else
+        s->time= picture_number*(INT64)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
+    time_div= s->time/s->time_increment_resolution;
+    time_mod= s->time%s->time_increment_resolution;
+
+    if(s->pict_type==B_TYPE){
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+    }else{
+        s->last_time_base= s->time_base;
+        s->time_base= time_div;
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
     }
-    put_bits(pbc, 8, 0);
 }
 
 static void mpeg4_encode_vol_header(MpegEncContext * s)
 {
     int vo_ver_id=1; //must be 2 if we want GMC or q-pel
+    char buf[255];
+
+    s->vo_type= s->has_b_frames ? CORE_VO_TYPE : SIMPLE_VO_TYPE;
 
-    if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, 0x100);        /* video obj */
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, 0x120);        /* video obj layer */
 
     put_bits(&s->pb, 1, 0);            /* random access vol */
-    put_bits(&s->pb, 8, 1);            /* video obj type indication= simple obj */
+    put_bits(&s->pb, 8, s->vo_type);   /* video obj type indication */
     put_bits(&s->pb, 1, 1);            /* is obj layer id= yes */
       put_bits(&s->pb, 4, vo_ver_id);  /* is obj layer ver id */
       put_bits(&s->pb, 3, 1);          /* is obj layer priority */
-    put_bits(&s->pb, 4, 1);            /* aspect ratio info= sqare pixel */ //FIXME real aspect
-    put_bits(&s->pb, 1, 0);            /* vol control parameters= no */
+    if(s->aspect_ratio_info) 
+        put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
+    else
+        put_bits(&s->pb, 4, 1);                /* aspect ratio info= sqare pixel */
+    if (s->aspect_ratio_info == FF_ASPECT_EXTENDED)
+    {
+       put_bits(&s->pb, 8, s->aspected_width);
+       put_bits(&s->pb, 8, s->aspected_height);
+    }
+
+    if(s->low_delay){
+        put_bits(&s->pb, 1, 1);                /* vol control parameters= yes */
+        put_bits(&s->pb, 2, 1);                /* chroma format YUV 420/YV12 */
+        put_bits(&s->pb, 1, s->low_delay);
+        put_bits(&s->pb, 1, 0);                /* vbv parameters= no */
+    }else{
+        put_bits(&s->pb, 1, 0);                /* vol control parameters= no */
+    }
+
     put_bits(&s->pb, 2, RECT_SHAPE);   /* vol shape= rectangle */
     put_bits(&s->pb, 1, 1);            /* marker bit */
-    put_bits(&s->pb, 16, s->time_increment_resolution=30000);
-    s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+    
+    put_bits(&s->pb, 16, s->time_increment_resolution);
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
     put_bits(&s->pb, 1, 1);            /* marker bit */
@@ -765,7 +1528,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
     put_bits(&s->pb, 1, 1);            /* marker bit */
     put_bits(&s->pb, 13, s->height);   /* vol height */
     put_bits(&s->pb, 1, 1);            /* marker bit */
-    put_bits(&s->pb, 1, 0);            /* interlace */
+    put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
     put_bits(&s->pb, 1, 1);            /* obmc disable */
     if (vo_ver_id == 1) {
         put_bits(&s->pb, 1, s->vol_sprite_usage=0);            /* sprite enable */
@@ -773,41 +1536,65 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
         put_bits(&s->pb, 2, s->vol_sprite_usage=0);            /* sprite enable */
     }
     put_bits(&s->pb, 1, 0);            /* not 8 bit */
-    put_bits(&s->pb, 1, 0);            /* quant type= h263 style*/
+    put_bits(&s->pb, 1, s->mpeg_quant);        /* quant type= (0=h263 style)*/
+    if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */
+
     if (vo_ver_id != 1)
         put_bits(&s->pb, 1, s->quarter_sample=0);
     put_bits(&s->pb, 1, 1);            /* complexity estimation disable */
-    put_bits(&s->pb, 1, 1);            /* resync marker disable */
-    put_bits(&s->pb, 1, 0);            /* data partitioned */
+    s->resync_marker= s->rtp_mode;
+    put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
+    put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
+    if(s->data_partitioning){
+        put_bits(&s->pb, 1, 0);                /* no rvlc */
+    }
+
     if (vo_ver_id != 1){
         put_bits(&s->pb, 1, 0);                /* newpred */
         put_bits(&s->pb, 1, 0);                /* reduced res vop */
     }
     put_bits(&s->pb, 1, 0);            /* scalability */
-
-    mpeg4_stuffing(&s->pb);
-    put_bits(&s->pb, 16, 0);
-    put_bits(&s->pb, 16, 0x1B2);       /* user_data */
-    put_string(&s->pb, "ffmpeg"); //FIXME append some version ...
-
-    s->no_rounding = 0;
+    
+    ff_mpeg4_stuffing(&s->pb);
+
+    /* user data */
+    if(!ff_bit_exact){
+        put_bits(&s->pb, 16, 0);
+        put_bits(&s->pb, 16, 0x1B2);   /* user_data */
+        sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR);
+        put_string(&s->pb, buf);
+        ff_mpeg4_stuffing(&s->pb);
+    }
 }
 
 /* write mpeg4 VOP header */
 void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 {
-    if(s->pict_type==I_TYPE) mpeg4_encode_vol_header(s);
-
-    if(get_bit_count(&s->pb)!=0) mpeg4_stuffing(&s->pb);
+    int time_incr;
+    int time_div, time_mod;
+    
+    if(s->pict_type==I_TYPE){
+        s->no_rounding=0;
+        if(picture_number==0 || !s->strict_std_compliance)
+            mpeg4_encode_vol_header(s);
+    }
+    
+//printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
+    
     put_bits(&s->pb, 16, 0);           /* vop header */
     put_bits(&s->pb, 16, 0x1B6);       /* vop header */
     put_bits(&s->pb, 2, s->pict_type - 1);     /* pict type: I = 0 , P = 1 */
-    /* XXX: time base + 1 not always correct */
-    put_bits(&s->pb, 1, 1);
+
+    time_div= s->time/s->time_increment_resolution;
+    time_mod= s->time%s->time_increment_resolution;
+    time_incr= time_div - s->last_time_base;
+    while(time_incr--)
+        put_bits(&s->pb, 1, 1);
+        
     put_bits(&s->pb, 1, 0);
 
     put_bits(&s->pb, 1, 1);    /* marker */
-    put_bits(&s->pb, s->time_increment_bits, 1);       /* XXX: correct time increment */
+    put_bits(&s->pb, s->time_increment_bits, time_mod);        /* time increment */
     put_bits(&s->pb, 1, 1);    /* marker */
     put_bits(&s->pb, 1, 1);    /* vop coded */
     if (    s->pict_type == P_TYPE 
@@ -816,6 +1603,10 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
        put_bits(&s->pb, 1, s->no_rounding);    /* rounding type */
     }
     put_bits(&s->pb, 3, 0);    /* intra dc VLC threshold */
+    if(!s->progressive_sequence){
+         put_bits(&s->pb, 1, s->top_field_first);
+         put_bits(&s->pb, 1, s->alternate_scan);
+    }
     //FIXME sprite stuff
 
     put_bits(&s->pb, 5, s->qscale);
@@ -825,44 +1616,27 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     if (s->pict_type == B_TYPE)
        put_bits(&s->pb, 3, s->b_code); /* fcode_back */
     //    printf("****frame %d\n", picture_number);
+
+     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
+     s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+     s->h_edge_pos= s->width;
+     s->v_edge_pos= s->height;
 }
 
-void h263_dc_scale(MpegEncContext * s)
+static void change_qscale(MpegEncContext * s, int dquant)
 {
-#if 1
-    const static UINT8 y_tab[32]={
-    //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-        0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,40,42,44,46
-    };
-    const static UINT8 c_tab[32]={
-    //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
-        0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,20,21,22,23,24,25
-    };
-    s->y_dc_scale = y_tab[s->qscale];
-    s->c_dc_scale = c_tab[s->qscale];
-#else
-    int quant;
-    quant = s->qscale;
-    /* luminance */
-    if (quant < 5)
-       s->y_dc_scale = 8;
-    else if (quant > 4 && quant < 9)
-       s->y_dc_scale = (2 * quant);
-    else if (quant > 8 && quant < 25)
-       s->y_dc_scale = (quant + 8);
-    else
-       s->y_dc_scale = (2 * quant - 16);
-    /* chrominance */
-    if (quant < 5)
-       s->c_dc_scale = 8;
-    else if (quant > 4 && quant < 25)
-       s->c_dc_scale = ((quant + 13) / 2);
-    else
-       s->c_dc_scale = (quant - 6);
-#endif
+    s->qscale += dquant;
+
+    if (s->qscale < 1)
+        s->qscale = 1;
+    else if (s->qscale > 31)
+        s->qscale = 31;
+
+    s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
+    s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
 }
 
-static inline int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
+inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
 {
     int a, b, c, wrap, pred, scale;
     UINT16 *dc_val;
@@ -920,25 +1694,47 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
     ac_val1 = ac_val;
     if (s->ac_pred) {
         if (dir == 0) {
+            const int xy= s->mb_x-1 + s->mb_y*s->mb_width;
             /* left prediction */
             ac_val -= 16;
-            for(i=1;i<8;i++) {
-                block[block_permute_op(i*8)] += ac_val[i];
+            
+            if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
+                /* same qscale */
+                for(i=1;i<8;i++) {
+                    block[block_permute_op(i*8)] += ac_val[i];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1;i<8;i++) {
+                    block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
+                }
             }
         } else {
+            const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width;
             /* top prediction */
             ac_val -= 16 * s->block_wrap[n];
-            for(i=1;i<8;i++) {
-                block[block_permute_op(i)] += ac_val[i + 8];
+
+            if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
+                /* same qscale */
+                for(i=1;i<8;i++) {
+                    block[block_permute_op(i)] += ac_val[i + 8];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1;i<8;i++) {
+                    block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
+                }
             }
         }
     }
     /* left copy */
     for(i=1;i<8;i++)
         ac_val1[i] = block[block_permute_op(i * 8)];
+
     /* top copy */
     for(i=1;i<8;i++)
         ac_val1[8 + i] = block[block_permute_op(i)];
+
 }
 
 static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
@@ -951,23 +1747,51 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
     ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
  
     if (dir == 0) {
+        const int xy= s->mb_x-1 + s->mb_y*s->mb_width;
         /* left prediction */
         ac_val -= 16;
-        for(i=1;i<8;i++) {
-            block[block_permute_op(i*8)] -= ac_val[i];
+        if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
+            /* same qscale */
+            for(i=1;i<8;i++) {
+                block[block_permute_op(i*8)] -= ac_val[i];
+            }
+        }else{
+            /* different qscale, we must rescale */
+            for(i=1;i<8;i++) {
+                block[block_permute_op(i*8)] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
+            }
         }
     } else {
+        const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width;
         /* top prediction */
         ac_val -= 16 * s->block_wrap[n];
-        for(i=1;i<8;i++) {
-            block[block_permute_op(i)] -= ac_val[i + 8];
+        if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
+            /* same qscale */
+            for(i=1;i<8;i++) {
+                block[block_permute_op(i)] -= ac_val[i + 8];
+            }
+        }else{
+            /* different qscale, we must rescale */
+            for(i=1;i<8;i++) {
+                block[block_permute_op(i)] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
+            }
         }
     }
 }
 
-
-static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n)
+static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
 {
+#if 1
+//    if(level<-255 || level>255) printf("dc overflow\n");
+    level+=256;
+    if (n < 4) {
+       /* luminance */
+       put_bits(s, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]);
+    } else {
+       /* chrominance */
+       put_bits(s, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]);
+    }
+#else
     int size, v;
     /* find number of bits */
     size = 0;
@@ -993,41 +1817,78 @@ static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n)
        if (size > 8)
            put_bits(&s->pb, 1, 1);
     }
+#endif
 }
 
-static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, UINT8 *scan_table)
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+                               UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
 {
-    int level, run, last, i, j, last_index, last_non_zero, sign, slevel;
+    int last, i, last_non_zero, sign;
     int code;
     const RLTable *rl;
+    UINT32 *bits_tab;
+    UINT8 *len_tab;
+    const int last_index = s->block_last_index[n];
 
-    if (s->mb_intra) {
+    if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
        /* mpeg4 based DC predictor */
-       mpeg4_encode_dc(s, intra_dc, n);
+       mpeg4_encode_dc(dc_pb, intra_dc, n);
+        if(last_index<1) return;
        i = 1;
         rl = &rl_intra;
+        bits_tab= uni_mpeg4_intra_rl_bits;
+        len_tab = uni_mpeg4_intra_rl_len;
     } else {
+        if(last_index<0) return;
        i = 0;
         rl = &rl_inter;
+        bits_tab= uni_mpeg4_inter_rl_bits;
+        len_tab = uni_mpeg4_inter_rl_len;
     }
 
     /* AC coefs */
-    last_index = s->block_last_index[n];
     last_non_zero = i - 1;
-    for (; i <= last_index; i++) {
-       j = scan_table[i];
-       level = block[j];
+#if 1
+    for (; i < last_index; i++) {
+       int level = block[ scan_table[i] ];
        if (level) {
-           run = i - last_non_zero - 1;
+           int run = i - last_non_zero - 1;
+            level+=64;
+            if((level&(~127)) == 0){
+                const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
+                put_bits(ac_pb, len_tab[index], bits_tab[index]);
+            }else{ //ESC3
+                put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
+            }
+           last_non_zero = i;
+       }
+    }
+    /*if(i<=last_index)*/{
+       int level = block[ scan_table[i] ];
+        int run = i - last_non_zero - 1;
+        level+=64;
+        if((level&(~127)) == 0){
+            const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
+            put_bits(ac_pb, len_tab[index], bits_tab[index]);
+        }else{ //ESC3
+            put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
+        }
+    }
+#else
+    for (; i <= last_index; i++) {
+       const int slevel = block[ scan_table[i] ];
+       if (slevel) {
+            int level;
+           int run = i - last_non_zero - 1;
            last = (i == last_index);
            sign = 0;
-           slevel = level;
+           level = slevel;
            if (level < 0) {
                sign = 1;
                level = -level;
            }
             code = get_rl_index(rl, last, run, level);
-            put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+            put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
             if (code == rl->n) {
                 int level1, run1;
                 level1 = level - rl->max_level[last][run];
@@ -1036,7 +1897,7 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
                 code = get_rl_index(rl, last, run, level1);
                 if (code == rl->n) {
                 esc2:
-                    put_bits(&s->pb, 1, 1);
+                    put_bits(ac_pb, 1, 1);
                     if (level > MAX_LEVEL)
                         goto esc3;
                     run1 = run - rl->max_run[last][level] - 1;
@@ -1046,30 +1907,31 @@ static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int i
                     if (code == rl->n) {
                     esc3:
                         /* third escape */
-                        put_bits(&s->pb, 1, 1);
-                        put_bits(&s->pb, 1, last);
-                        put_bits(&s->pb, 6, run);
-                        put_bits(&s->pb, 1, 1);
-                        put_bits(&s->pb, 12, slevel & 0xfff);
-                        put_bits(&s->pb, 1, 1);
+                        put_bits(ac_pb, 1, 1);
+                        put_bits(ac_pb, 1, last);
+                        put_bits(ac_pb, 6, run);
+                        put_bits(ac_pb, 1, 1);
+                        put_bits(ac_pb, 12, slevel & 0xfff);
+                        put_bits(ac_pb, 1, 1);
                     } else {
                         /* second escape */
-                        put_bits(&s->pb, 1, 0);
-                        put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
-                        put_bits(&s->pb, 1, sign);
+                        put_bits(ac_pb, 1, 0);
+                        put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                        put_bits(ac_pb, 1, sign);
                     }
                 } else {
                     /* first escape */
-                    put_bits(&s->pb, 1, 0);
-                    put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
-                    put_bits(&s->pb, 1, sign);
+                    put_bits(ac_pb, 1, 0);
+                    put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
+                    put_bits(ac_pb, 1, sign);
                 }
             } else {
-                put_bits(&s->pb, 1, sign);
+                put_bits(ac_pb, 1, sign);
             }
            last_non_zero = i;
        }
     }
+#endif
 }
 
 
@@ -1114,20 +1976,60 @@ void init_rl(RLTable *rl)
             if (run > max_run[level])
                 max_run[level] = run;
         }
-        rl->max_level[last] = malloc(MAX_RUN + 1);
+        rl->max_level[last] = av_malloc(MAX_RUN + 1);
         memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
-        rl->max_run[last] = malloc(MAX_LEVEL + 1);
+        rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
         memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
-        rl->index_run[last] = malloc(MAX_RUN + 1);
+        rl->index_run[last] = av_malloc(MAX_RUN + 1);
         memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
     }
 }
 
 void init_vlc_rl(RLTable *rl)
 {
+    int i, q;
+    
     init_vlc(&rl->vlc, 9, rl->n + 1, 
              &rl->table_vlc[0][1], 4, 2,
              &rl->table_vlc[0][0], 4, 2);
+
+    
+    for(q=0; q<32; q++){
+        int qmul= q*2;
+        int qadd= (q-1)|1;
+        
+        if(q==0){
+            qmul=1;
+            qadd=0;
+        }
+        
+        rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+        for(i=0; i<rl->vlc.table_size; i++){
+            int code= rl->vlc.table[i][0];
+            int len = rl->vlc.table[i][1];
+            int level, run;
+        
+            if(len==0){ // illegal code
+                run= 66;
+                level= MAX_LEVEL;
+            }else if(len<0){ //more bits needed
+                run= 0;
+                level= code;
+            }else{
+                if(code==rl->n){ //esc
+                    run= 66;
+                    level= 0;
+                }else{
+                    run=   rl->table_run  [code] + 1;
+                    level= rl->table_level[code] * qmul + qadd;
+                    if(code >= rl->last) run+=192;
+                }
+            }
+            rl->rl_vlc[q][i].len= len;
+            rl->rl_vlc[q][i].level= level;
+            rl->rl_vlc[q][i].run= run;
+        }
+    }
 }
 
 /* init vlcs */
@@ -1140,16 +2042,16 @@ void h263_decode_init_vlc(MpegEncContext *s)
     if (!done) {
         done = 1;
 
-        init_vlc(&intra_MCBPC_vlc, 6, 8, 
+        init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 8, 
                  intra_MCBPC_bits, 1, 1,
                  intra_MCBPC_code, 1, 1);
-        init_vlc(&inter_MCBPC_vlc, 9, 25, 
+        init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 25, 
                  inter_MCBPC_bits, 1, 1,
                  inter_MCBPC_code, 1, 1);
-        init_vlc(&cbpy_vlc, 6, 16,
+        init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
                  &cbpy_tab[0][1], 2, 1,
                  &cbpy_tab[0][0], 2, 1);
-        init_vlc(&mv_vlc, 9, 33,
+        init_vlc(&mv_vlc, MV_VLC_BITS, 33,
                  &mvtab[0][1], 2, 1,
                  &mvtab[0][0], 2, 1);
         init_rl(&rl_inter);
@@ -1158,19 +2060,21 @@ void h263_decode_init_vlc(MpegEncContext *s)
         init_vlc_rl(&rl_inter);
         init_vlc_rl(&rl_intra);
         init_vlc_rl(&rl_intra_aic);
-        init_vlc(&dc_lum, 9, 13,
+        init_vlc(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
                  &DCtab_lum[0][1], 2, 1,
                  &DCtab_lum[0][0], 2, 1);
-        init_vlc(&dc_chrom, 9, 13,
+        init_vlc(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
                  &DCtab_chrom[0][1], 2, 1,
                  &DCtab_chrom[0][0], 2, 1);
-        init_vlc(&sprite_trajectory, 9, 15,
+        init_vlc(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
                  &sprite_trajectory_tab[0][1], 4, 2,
                  &sprite_trajectory_tab[0][0], 4, 2);
-        init_vlc(&mb_type_b_vlc, 4, 4,
+        init_vlc(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
                  &mb_type_b_tab[0][1], 2, 1,
                  &mb_type_b_tab[0][0], 2, 1);
     }
+
+    s->progressive_sequence=1; // set to most likely for the case of incomplete headers
 }
 
 int h263_decode_gob_header(MpegEncContext *s)
@@ -1198,6 +2102,668 @@ int h263_decode_gob_header(MpegEncContext *s)
             
 }
 
+static inline void memsetw(short *tab, int val, int n)
+{
+    int i;
+    for(i=0;i<n;i++)
+        tab[i] = val;
+}
+
+void ff_mpeg4_init_partitions(MpegEncContext *s)
+{
+    init_put_bits(&s->tex_pb, s->tex_pb_buffer, PB_BUFFER_SIZE, NULL, NULL);
+    init_put_bits(&s->pb2   , s->pb2_buffer   , PB_BUFFER_SIZE, NULL, NULL);
+}
+
+void ff_mpeg4_merge_partitions(MpegEncContext *s)
+{
+    const int pb2_len   = get_bit_count(&s->pb2   );
+    const int tex_pb_len= get_bit_count(&s->tex_pb);
+    const int bits= get_bit_count(&s->pb);
+
+    if(s->pict_type==I_TYPE){
+        put_bits(&s->pb, 19, DC_MARKER);
+        s->misc_bits+=19 + pb2_len + bits - s->last_bits;
+        s->i_tex_bits+= tex_pb_len;
+    }else{
+        put_bits(&s->pb, 17, MOTION_MARKER);
+        s->misc_bits+=17 + pb2_len;;
+        s->mv_bits+= bits - s->last_bits;
+        s->p_tex_bits+= tex_pb_len;
+    }
+
+    flush_put_bits(&s->pb2);
+    flush_put_bits(&s->tex_pb);
+
+    ff_copy_bits(&s->pb, s->pb2_buffer   , pb2_len);
+    ff_copy_bits(&s->pb, s->tex_pb_buffer, tex_pb_len);
+    s->last_bits= get_bit_count(&s->pb);
+}
+
+void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
+{
+    int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+
+    ff_mpeg4_stuffing(&s->pb);
+    if(s->pict_type==I_TYPE)
+        put_bits(&s->pb, 16, 0);
+    else if(s->pict_type==B_TYPE)
+        put_bits(&s->pb, MAX(MAX(s->f_code, s->b_code)+15, 17), 0);
+    else /* S/P_TYPE */
+        put_bits(&s->pb, s->f_code+15, 0);
+    put_bits(&s->pb, 1, 1);
+    
+    put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
+    put_bits(&s->pb, 5, s->qscale);
+    put_bits(&s->pb, 1, 0); /* no HEC */
+}
+
+/**
+ * decodes the next video packet and sets s->next_qscale 
+ * returns mb_num of the next packet or <0 if something went wrong
+ */
+static int decode_video_packet_header(MpegEncContext *s, GetBitContext *gb)
+{
+    int bits;
+    int mb_num_bits= av_log2(s->mb_num - 1) + 1;
+    int header_extension=0, mb_num;
+//printf("%X\n", show_bits(&gb, 24));
+//printf("parse_video_packet_header\n");
+//    if(show_aligned_bits(gb, 1, 16) != 0) return -1;
+    
+    /* is there enough space left for a video packet + header */
+    if( get_bits_count(gb) > gb->size*8-20) return -1;
+
+//printf("resync at %d %d\n", s->mb_x, s->mb_y);
+//    skip_bits(gb, 1);
+//    align_get_bits(gb);
+    if(get_bits(gb, 16)!=0){
+        printf("internal error while decoding video packet header\n");
+    }
+
+//printf("%X\n", show_bits(gb, 24));
+    bits=0;
+    while(!get_bits1(gb) && bits<30) bits++;
+    if((s->pict_type == P_TYPE || s->pict_type == S_TYPE) && bits != s->f_code-1){
+        printf("marker does not match f_code (is: %d should be: %d pos: %d end %d x: %d y: %d)\n", 
+               bits+1, s->f_code, get_bits_count(gb), gb->size*8, s->mb_x, s->mb_y);
+        return -1;
+    }else if(s->pict_type == I_TYPE && bits != 0){
+        printf("marker too long\n");
+        return -1;
+    }else if(s->pict_type == B_TYPE && bits != MAX(MAX(s->f_code, s->b_code)-1, 1)){
+        printf("marker does not match f/b_code\n");
+        return -1;
+    }
+//printf("%X\n", show_bits(gb, 24));
+
+    if(s->shape != RECT_SHAPE){
+        header_extension= get_bits1(gb);
+        //FIXME more stuff here
+    }
+
+    mb_num= get_bits(gb, mb_num_bits);
+    if(mb_num < s->mb_x + s->mb_y*s->mb_width || mb_num>=s->mb_num){
+        fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_x + s->mb_y*s->mb_width);
+        return -1;
+    }
+
+    if(s->shape != BIN_ONLY_SHAPE){
+        s->next_resync_qscale= get_bits(gb, 5);
+        if(s->next_resync_qscale==0)
+            s->next_resync_qscale= s->qscale;
+        if(s->next_resync_qscale==0){
+            fprintf(stderr, "qscale==0\n");
+            return -1;
+        }
+    }
+
+    if(s->shape == RECT_SHAPE){
+        header_extension= get_bits1(gb);
+    }
+    if(header_extension){
+        int time_increment;
+        int time_incr=0;
+
+        while (get_bits1(gb) != 0) 
+            time_incr++;
+
+        check_marker(gb, "before time_increment in video packed header");
+        time_increment= get_bits(gb, s->time_increment_bits);
+        check_marker(gb, "before vop_coding_type in video packed header");
+        
+        skip_bits(gb, 2); /* vop coding type */
+        //FIXME not rect stuff here
+
+        if(s->shape != BIN_ONLY_SHAPE){
+            skip_bits(gb, 3); /* intra dc vlc threshold */
+
+            if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){
+                mpeg4_decode_sprite_trajectory(s);
+                fprintf(stderr, "untested\n");
+            }
+
+            //FIXME reduced res stuff here
+            
+            if (s->pict_type != I_TYPE) {
+                int f_code = get_bits(gb, 3);  /* fcode_for */
+                if(f_code==0){
+                    printf("Error, video packet header damaged (f_code=0)\n");
+                }
+            }
+            if (s->pict_type == B_TYPE) {
+                int b_code = get_bits(gb, 3);
+                if(b_code==0){
+                    printf("Error, video packet header damaged (b_code=0)\n");
+                }
+            }       
+        }
+    }
+    //FIXME new-pred stuff
+    
+//printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
+
+    return mb_num;
+}
+
+void ff_mpeg4_clean_buffers(MpegEncContext *s)
+{
+    int c_wrap, c_xy, l_wrap, l_xy;
+
+    l_wrap= s->block_wrap[0];
+    l_xy= s->mb_y*l_wrap*2 + s->mb_x*2;
+    c_wrap= s->block_wrap[4];
+    c_xy= s->mb_y*c_wrap + s->mb_x;
+
+    /* clean DC */
+    memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1);
+    memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1);
+    memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1);
+
+    /* clean AC */
+    memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(INT16));
+    memset(s->ac_val[1] + c_xy, 0, (c_wrap  +1)*16*sizeof(INT16));
+    memset(s->ac_val[2] + c_xy, 0, (c_wrap  +1)*16*sizeof(INT16));
+
+    /* clean MV */
+    // we cant clear the MVs as they might be needed by a b frame
+//    memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(INT16));
+//    memset(s->motion_val, 0, 2*sizeof(INT16)*(2 + s->mb_width*2)*(2 + s->mb_height*2));
+    s->last_mv[0][0][0]=
+    s->last_mv[0][0][1]=
+    s->last_mv[1][0][0]=
+    s->last_mv[1][0][1]= 0;
+}
+
+/* searches for the next resync marker clears ac,dc,mc, and sets s->next_resync_gb, s->mb_num_left */
+int ff_mpeg4_resync(MpegEncContext *s)
+{
+    GetBitContext gb;
+    
+    /* search & parse next resync marker */
+    
+    gb= s->next_resync_gb;
+    align_get_bits(&gb);
+//printf("mpeg4_resync %d next:%d \n", get_bits_count(&gb), get_bits_count(&s->next_resync_gb));
+    for(;;) {
+        int v= show_bits(&gb, 24);
+        if( get_bits_count(&gb) >= gb.size*8-24 || v == 1 /* start-code */){
+            s->mb_num_left= s->mb_num - s->mb_x - s->mb_y*s->mb_width;
+//printf("mpeg4_resync end\n");
+            s->gb= s->next_resync_gb; //continue at the next resync marker
+            return -1;
+        }else if(v>>8 == 0){
+            int next;
+            s->next_resync_pos= get_bits_count(&gb);
+            
+            next= decode_video_packet_header(s, &gb);
+            if(next >= 0){
+                s->mb_num_left= next - s->mb_x - s->mb_y*s->mb_width;
+                break;
+            }
+
+            align_get_bits(&gb);
+        }
+        skip_bits(&gb, 8);
+    }
+    s->next_resync_gb=gb;
+    
+    return 0;
+}
+
+static inline void init_block_index(MpegEncContext *s)
+{
+    s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
+    s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
+    s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
+    s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
+    s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
+    s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
+}
+
+static inline void update_block_index(MpegEncContext *s)
+{
+    s->block_index[0]+=2;
+    s->block_index[1]+=2;
+    s->block_index[2]+=2;
+    s->block_index[3]+=2;
+    s->block_index[4]++;
+    s->block_index[5]++;
+}
+
+/**
+ * decodes the first & second partition
+ * returns error type or 0 if no error
+ */
+int ff_mpeg4_decode_partitions(MpegEncContext *s)
+{
+    static const INT8 quant_tab[4] = { -1, -2, 1, 2 };
+    int mb_num;
+    
+    /* decode first partition */
+    mb_num=0;
+    s->first_slice_line=1;
+    s->mb_x= s->resync_mb_x;
+    for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
+        init_block_index(s);
+        for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+            const int xy= s->mb_x + s->mb_y*s->mb_width;
+            int cbpc;
+            int dir=0;
+            
+            mb_num++;
+            update_block_index(s);
+            if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
+                s->first_slice_line=0;
+            
+            if(s->mb_x==0) PRINT_MB_TYPE("\n");
+
+            if(s->pict_type==I_TYPE){
+                int i;
+
+                PRINT_MB_TYPE("I");
+                cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 1);
+                if (cbpc < 0){
+                    fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return DECODING_DESYNC;
+                }
+                s->cbp_table[xy]= cbpc & 3;
+                s->mb_type[xy]= MB_TYPE_INTRA;
+                s->mb_intra = 1;
+
+                if(cbpc & 4) {
+                    change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
+                }
+                s->qscale_table[xy]= s->qscale;
+
+                s->mbintra_table[xy]= 1;
+                for(i=0; i<6; i++){
+                    int dc_pred_dir;
+                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                    if(dc < 0){
+                        fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return DECODING_DESYNC;
+                    }
+                    dir<<=1;
+                    if(dc_pred_dir) dir|=1;
+                }
+                s->pred_dir_table[xy]= dir;
+            }else{ /* P/S_TYPE */
+                int mx, my, pred_x, pred_y;
+                INT16 * const mot_val= s->motion_val[s->block_index[0]];
+                const int stride= s->block_wrap[0]*2;
+
+                if(get_bits1(&s->gb)){
+                    /* skip mb */
+                    s->mb_type[xy]= MB_TYPE_SKIPED;
+                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                        const int a= s->sprite_warping_accuracy;
+                        PRINT_MB_TYPE("G");
+                        if(s->divx_version==500 && s->divx_build==413){
+                            mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+                            my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+                        }else{
+                            mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+                            my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+                            s->mb_type[xy]= MB_TYPE_GMC | MB_TYPE_SKIPED;
+                        }
+                    }else{
+                        PRINT_MB_TYPE("S");
+                        mx = 0;
+                        my = 0;
+                    }
+                    mot_val[0       ]= mot_val[2       ]=
+                    mot_val[0+stride]= mot_val[2+stride]= mx;
+                    mot_val[1       ]= mot_val[3       ]=
+                    mot_val[1+stride]= mot_val[3+stride]= my;
+
+                    if(s->mbintra_table[xy])
+                        ff_clean_intra_table_entries(s);
+
+                    continue;
+                }
+                cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+                if (cbpc < 0){
+                    fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return DECODING_DESYNC;
+                }
+                if (cbpc > 20)
+                    cbpc+=3;
+                else if (cbpc == 20)
+                    fprintf(stderr, "Stuffing !");
+                s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
+    
+                s->mb_intra = ((cbpc & 4) != 0);
+        
+                if(s->mb_intra){
+                    PRINT_MB_TYPE("I");
+                    s->mbintra_table[xy]= 1;
+                    s->mb_type[xy]= MB_TYPE_INTRA;
+                    mot_val[0       ]= mot_val[2       ]= 
+                    mot_val[0+stride]= mot_val[2+stride]= 0;
+                    mot_val[1       ]= mot_val[3       ]=
+                    mot_val[1+stride]= mot_val[3+stride]= 0;
+                }else{
+                    if(s->mbintra_table[xy])
+                        ff_clean_intra_table_entries(s);
+
+                    if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
+                        s->mcsel= get_bits1(&s->gb);
+                    else s->mcsel= 0;
+        
+                    if ((cbpc & 16) == 0) {
+                        PRINT_MB_TYPE("P");
+                        /* 16x16 motion prediction */
+                        s->mb_type[xy]= MB_TYPE_INTER;
+
+                        h263_pred_motion(s, 0, &pred_x, &pred_y);
+                        if(!s->mcsel)
+                           mx = h263_decode_motion(s, pred_x, s->f_code);
+                        else {
+                            const int a= s->sprite_warping_accuracy;
+                            if(s->divx_version==500 && s->divx_build==413){
+                                mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+                            }else{
+                                mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+                            }
+                        }
+                        if (mx >= 0xffff)
+                            return DECODING_DESYNC;
+            
+                        if(!s->mcsel)
+                           my = h263_decode_motion(s, pred_y, s->f_code);
+                        else{
+                           const int a= s->sprite_warping_accuracy;
+                            if(s->divx_version==500 && s->divx_build==413){
+                                my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+                            }else{
+                                my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+                            }
+                        }
+                        if (my >= 0xffff)
+                            return DECODING_DESYNC;
+                        mot_val[0       ]= mot_val[2       ] =
+                        mot_val[0+stride]= mot_val[2+stride]= mx;
+                        mot_val[1       ]= mot_val[3       ]=
+                        mot_val[1+stride]= mot_val[3+stride]= my;
+                    } else {
+                        int i;
+                        PRINT_MB_TYPE("4");
+                        s->mb_type[xy]= MB_TYPE_INTER4V;
+                        for(i=0;i<4;i++) {
+                            INT16 *mot_val= h263_pred_motion(s, i, &pred_x, &pred_y);
+                            mx = h263_decode_motion(s, pred_x, s->f_code);
+                            if (mx >= 0xffff)
+                                return DECODING_DESYNC;
+                
+                            my = h263_decode_motion(s, pred_y, s->f_code);
+                            if (my >= 0xffff)
+                                return DECODING_DESYNC;
+                            mot_val[0] = mx;
+                            mot_val[1] = my;
+                        }
+                    }
+                }
+            }
+        }
+        s->mb_x= 0;
+    }
+
+    if     (s->pict_type==I_TYPE && get_bits(&s->gb, 19)!=DC_MARKER    ) s->decoding_error= DECODING_DESYNC;
+    else if(s->pict_type!=I_TYPE && get_bits(&s->gb, 17)!=MOTION_MARKER) s->decoding_error= DECODING_DESYNC;
+    if(s->decoding_error== DECODING_DESYNC){
+        fprintf(stderr, "marker missing after first partition at %d %d\n", s->mb_x, s->mb_y);
+        return DECODING_DESYNC;
+    }
+
+    /* decode second partition */
+    mb_num=0;
+    s->mb_x= s->resync_mb_x;
+    for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
+        init_block_index(s);
+        for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+            const int xy= s->mb_x + s->mb_y*s->mb_width;
+
+            mb_num++;
+            update_block_index(s);
+            
+            if(s->pict_type==I_TYPE){
+                int ac_pred= get_bits1(&s->gb);
+                int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+                if(cbpy<0){
+                    fprintf(stderr, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return DECODING_AC_LOST;
+                }
+                
+                s->cbp_table[xy]|= cbpy<<2;
+                s->pred_dir_table[xy]|= ac_pred<<7;
+            }else{ /* P || S_TYPE */
+                if(s->mb_type[xy]&MB_TYPE_INTRA){          
+                    int dir=0,i;
+                    int ac_pred = get_bits1(&s->gb);
+                    int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+                    if(cbpy<0){
+                        fprintf(stderr, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return DECODING_ACDC_LOST;
+                    }
+                    
+                    if(s->cbp_table[xy] & 8) {
+                        change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
+                    }
+                    s->qscale_table[xy]= s->qscale;
+
+                    for(i=0; i<6; i++){
+                        int dc_pred_dir;
+                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                        if(dc < 0){
+                            fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
+                            return DECODING_ACDC_LOST;
+                        }
+                        dir<<=1;
+                        if(dc_pred_dir) dir|=1;
+                    }
+                    s->cbp_table[xy]&= 3; //remove dquant
+                    s->cbp_table[xy]|= cbpy<<2;
+                    s->pred_dir_table[xy]= dir | (ac_pred<<7);
+                }else if(s->mb_type[xy]&MB_TYPE_SKIPED){
+                    s->qscale_table[xy]= s->qscale;
+                    s->cbp_table[xy]= 0;
+                }else{
+                    int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+
+                    if(cbpy<0){
+                        fprintf(stderr, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
+                        return DECODING_ACDC_LOST;
+                    }
+                    
+                    if(s->cbp_table[xy] & 8) {
+                        change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
+                    }
+                    s->qscale_table[xy]= s->qscale;
+
+                    s->cbp_table[xy]&= 3; //remove dquant
+                    s->cbp_table[xy]|= (cbpy^0xf)<<2;
+                }
+            }
+        }
+        s->mb_x= 0;
+    }
+    
+
+    return 0;        
+}
+
+static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
+                   DCTELEM block[6][64])
+{
+    int cbp, mb_type;
+    const int xy= s->mb_x + s->mb_y*s->mb_width;
+
+    if(s->mb_x==s->resync_mb_x && s->mb_y==s->resync_mb_y){ //Note resync_mb_{x,y}==0 at the start
+        int i;
+        int block_index_backup[6];
+        int qscale= s->qscale;
+        
+        for(i=0; i<6; i++) block_index_backup[i]= s->block_index[i];
+        
+        s->decoding_error= ff_mpeg4_decode_partitions(s);
+        
+        for(i=0; i<6; i++) s->block_index[i]= block_index_backup[i];
+        s->first_slice_line=1;
+        s->mb_x= s->resync_mb_x;
+        s->mb_y= s->resync_mb_y;
+        s->qscale= qscale;
+        s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
+
+        if(s->decoding_error==DECODING_DESYNC) return -1;
+    }
+    
+    mb_type= s->mb_type[xy];
+    if(s->decoding_error)
+        cbp=0;
+    else 
+        cbp = s->cbp_table[xy];
+
+    if(s->decoding_error!=DECODING_ACDC_LOST && s->qscale_table[xy] != s->qscale){
+        s->qscale= s->qscale_table[xy];
+        s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
+    }
+
+    if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
+        int i;
+        for(i=0; i<4; i++){
+            s->mv[0][i][0] = s->motion_val[ s->block_index[i] ][0];
+            s->mv[0][i][1] = s->motion_val[ s->block_index[i] ][1];
+        }
+        s->mb_intra = mb_type&MB_TYPE_INTRA;
+
+        if (mb_type&MB_TYPE_SKIPED) {
+            /* skip mb */
+            for(i=0;i<6;i++)
+                s->block_last_index[i] = -1;
+            s->mv_dir = MV_DIR_FORWARD;
+            s->mv_type = MV_TYPE_16X16;
+            if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                s->mcsel=1;
+                s->mb_skiped = 0;
+            }else{
+                s->mcsel=0;
+                s->mb_skiped = 1;
+            }
+            return 0;
+        }else if(s->mb_intra && s->decoding_error!=DECODING_ACDC_LOST){
+            s->ac_pred = s->pred_dir_table[xy]>>7;
+
+            /* decode each block */
+            for (i = 0; i < 6; i++) {
+                int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1);
+                if(ret==DECODING_AC_LOST){
+                    fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
+                    s->decoding_error=DECODING_AC_LOST;
+                    cbp=0;
+                }else if(ret==DECODING_ACDC_LOST){
+                    fprintf(stderr, "dc corrupted at %d %d (trying to continue with mc only)\n", s->mb_x, s->mb_y);
+                    s->decoding_error=DECODING_ACDC_LOST;
+                    break;
+                }
+            }
+        }else if(!s->mb_intra){
+//            s->mcsel= 0; //FIXME do we need to init that
+            
+            s->mv_dir = MV_DIR_FORWARD;
+            if (mb_type&MB_TYPE_INTER4V) {
+                s->mv_type = MV_TYPE_8X8;
+            } else {
+                s->mv_type = MV_TYPE_16X16;
+            }
+            if(s->decoding_error==0 && cbp){
+                /* decode each block */
+                for (i = 0; i < 6; i++) {
+                    int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0);
+                    if(ret==DECODING_AC_LOST){
+                        fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
+                        s->decoding_error=DECODING_AC_LOST;
+                        break;
+                    }
+                }
+            }
+        }
+    } else { /* I-Frame */
+        int i;
+        s->mb_intra = 1;
+        s->ac_pred = s->pred_dir_table[xy]>>7;
+        
+        /* decode each block */
+        for (i = 0; i < 6; i++) {
+            int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1);
+            if(ret==DECODING_AC_LOST){
+                fprintf(stderr, "texture corrupted at %d %d (trying to continue with dc only)\n", s->mb_x, s->mb_y);
+                s->decoding_error=DECODING_AC_LOST;
+                cbp=0;
+            }else if(ret==DECODING_ACDC_LOST){
+                fprintf(stderr, "dc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+#if 0
+static inline void decode_interlaced_info(MpegEncContext *s, int cbp, int mb_type){
+    s->mv_type= 0;            
+    if(!s->progressive_sequence){
+        if(cbp || s->mb_intra)
+            s->interlaced_dct= get_bits1(&s->gb);
+        
+        if(!s->mb_intra){
+            if(   s->pict_type==P_TYPE //FIXME check that 4MV is forbidden
+               || (s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && !s->mcsel)
+               || (s->pict_type==B_TYPE && mb_type!=0) ){
+
+                if(get_bits1(&s->gb)){
+                    s->mv_type= MV_TYPE_FIELD;
+
+                    if(   s->pict_type==P_TYPE
+                       || (s->pict_type==B_TYPE && mb_type!=2)){
+                        s->field_select[0][0]= get_bits1(&s->gb);
+                        s->field_select[0][1]= get_bits1(&s->gb);
+                    }
+                    if(s->pict_type==B_TYPE && mb_type!=3){
+                        s->field_select[1][0]= get_bits1(&s->gb);
+                        s->field_select[1][1]= get_bits1(&s->gb);
+                    }
+                }else
+                    s->mv_type= 0;            
+            }
+        }   
+    }
+}
+#endif
+
 int h263_decode_mb(MpegEncContext *s,
                    DCTELEM block[6][64])
 {
@@ -1205,6 +2771,17 @@ int h263_decode_mb(MpegEncContext *s,
     INT16 *mot_val;
     static INT8 quant_tab[4] = { -1, -2, 1, 2 };
 
+    if(s->mb_x==0) PRINT_MB_TYPE("\n");
+
+    if(s->resync_marker){
+        if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
+            s->first_slice_line=0; 
+        }
+    }
+
+    if(s->data_partitioning && s->pict_type!=B_TYPE)
+        return mpeg4_decode_partitioned_mb(s, block);
+
     if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
         if (get_bits1(&s->gb)) {
             /* skip mb */
@@ -1216,10 +2793,15 @@ int h263_decode_mb(MpegEncContext *s,
             if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
                 const int a= s->sprite_warping_accuracy;
 //                int l = (1 << (s->f_code - 1)) * 32;
-
+                PRINT_MB_TYPE("G");
                 s->mcsel=1;
-                s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-                s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+                if(s->divx_version==500 && s->divx_build==413){
+                    s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+                    s->mv[0][0][1] = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+                }else{
+                    s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+                    s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+                }
 /*                if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l;
                 else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1;
                 if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l;
@@ -1227,6 +2809,7 @@ int h263_decode_mb(MpegEncContext *s,
 
                 s->mb_skiped = 0;
             }else{
+                PRINT_MB_TYPE("S");
                 s->mcsel=0;
                 s->mv[0][0][0] = 0;
                 s->mv[0][0][1] = 0;
@@ -1234,7 +2817,7 @@ int h263_decode_mb(MpegEncContext *s,
             }
             return 0;
         }
-        cbpc = get_vlc(&s->gb, &inter_MCBPC_vlc);
+        cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
         //fprintf(stderr, "\tCBPC: %d", cbpc);
         if (cbpc < 0)
             return -1;
@@ -1250,56 +2833,82 @@ int h263_decode_mb(MpegEncContext *s,
         if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
             s->mcsel= get_bits1(&s->gb);
         else s->mcsel= 0;
-        cbpy = get_vlc(&s->gb, &cbpy_vlc);
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         cbp = (cbpc & 3) | ((cbpy ^ 0xf) << 2);
         if (dquant) {
-            s->qscale += quant_tab[get_bits(&s->gb, 2)];
-            if (s->qscale < 1)
-                s->qscale = 1;
-            else if (s->qscale > 31)
-                s->qscale = 31;
+            change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
         }
+        if((!s->progressive_sequence) && (cbp || s->workaround_bugs==2))
+            s->interlaced_dct= get_bits1(&s->gb);
+        
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
-            /* 16x16 motion prediction */
-            s->mv_type = MV_TYPE_16X16;
-            h263_pred_motion(s, 0, &pred_x, &pred_y);
-            if (s->umvplus_dec)
-               mx = h263p_decode_umotion(s, pred_x);
-            else if(!s->mcsel)
-               mx = h263_decode_motion(s, pred_x, s->f_code);
-            else {
-               const int a= s->sprite_warping_accuracy;
+            if(s->mcsel){
+                const int a= s->sprite_warping_accuracy;
+                PRINT_MB_TYPE("G");
+                /* 16x16 global motion prediction */
+                s->mv_type = MV_TYPE_16X16;
 //        int l = (1 << (s->f_code - 1)) * 32;
-               mx= RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-//        if (mx < -l) mx= -l;
-//        else if (mx >= l) mx= l-1;
-            }
-            if (mx >= 0xffff)
-                return -1;
-            
-            if (s->umvplus_dec)
-               my = h263p_decode_umotion(s, pred_y);
-            else if(!s->mcsel)
-               my = h263_decode_motion(s, pred_y, s->f_code);
-            else{
-               const int a= s->sprite_warping_accuracy;
+                if(s->divx_version==500 && s->divx_build==413){
+                    mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
+                    my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
+                }else{
+                    mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+                    my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+                }
 //       int l = (1 << (s->f_code - 1)) * 32;
-               my= RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-//       if (my < -l) my= -l;
-//       else if (my >= l) my= l-1;
+                s->mv[0][0][0] = mx;
+                s->mv[0][0][1] = my;
+            }else if((!s->progressive_sequence) && get_bits1(&s->gb)){
+                PRINT_MB_TYPE("f");
+                /* 16x8 field motion prediction */
+                s->mv_type= MV_TYPE_FIELD;
+
+                s->field_select[0][0]= get_bits1(&s->gb);
+                s->field_select[0][1]= get_bits1(&s->gb);
+
+                h263_pred_motion(s, 0, &pred_x, &pred_y);
+                
+                for(i=0; i<2; i++){
+                    mx = h263_decode_motion(s, pred_x, s->f_code);
+                    if (mx >= 0xffff)
+                        return -1;
+            
+                    my = h263_decode_motion(s, pred_y/2, s->f_code);
+                    if (my >= 0xffff)
+                        return -1;
+
+                    s->mv[0][i][0] = mx;
+                    s->mv[0][i][1] = my;
+                }
+            }else{
+                PRINT_MB_TYPE("P");
+                /* 16x16 motion prediction */
+                s->mv_type = MV_TYPE_16X16;
+                h263_pred_motion(s, 0, &pred_x, &pred_y);
+                if (s->umvplus_dec)
+                   mx = h263p_decode_umotion(s, pred_x);
+                else
+                   mx = h263_decode_motion(s, pred_x, s->f_code);
+            
+                if (mx >= 0xffff)
+                    return -1;
+            
+                if (s->umvplus_dec)
+                   my = h263p_decode_umotion(s, pred_y);
+                else
+                   my = h263_decode_motion(s, pred_y, s->f_code);
+            
+                if (my >= 0xffff)
+                    return -1;
+                s->mv[0][0][0] = mx;
+                s->mv[0][0][1] = my;
+
+                if (s->umvplus_dec && (mx - pred_x) == 1 && (my - pred_y) == 1)
+                   skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */                   
             }
-            if (my >= 0xffff)
-                return -1;
-            s->mv[0][0][0] = mx;
-            s->mv[0][0][1] = my;
-            /*fprintf(stderr, "\n MB %d", (s->mb_y * s->mb_width) + s->mb_x);
-            fprintf(stderr, "\n\tmvx: %d\t\tpredx: %d", mx, pred_x);
-            fprintf(stderr, "\n\tmvy: %d\t\tpredy: %d", my, pred_y);*/
-            if (s->umvplus_dec && (mx - pred_x) == 1 && (my - pred_y) == 1)
-               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
-                           
         } else {
+            PRINT_MB_TYPE("4");
             s->mv_type = MV_TYPE_8X8;
             for(i=0;i<4;i++) {
                 mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
@@ -1328,18 +2937,20 @@ int h263_decode_mb(MpegEncContext *s,
         int modb1; // first bit of modb
         int modb2; // second bit of modb
         int mb_type;
-        int time_pp;
-        int time_pb;
+        uint16_t time_pp;
+        uint16_t time_pb;
         int xy;
 
         s->mb_intra = 0; //B-frames never contain intra blocks
         s->mcsel=0;      //     ...               true gmc blocks
 
         if(s->mb_x==0){
-            s->last_mv[0][0][0]= 
-            s->last_mv[0][0][1]= 
-            s->last_mv[1][0][0]= 
-            s->last_mv[1][0][1]= 0;
+            for(i=0; i<2; i++){
+                s->last_mv[i][0][0]= 
+                s->last_mv[i][0][1]= 
+                s->last_mv[i][1][0]= 
+                s->last_mv[i][1][1]= 0;
+            }
         }
 
         /* if we skipped it in the future P Frame than skip it now too */
@@ -1356,89 +2967,167 @@ int h263_decode_mb(MpegEncContext *s,
             s->mv[0][0][1] = 0;
             s->mv[1][0][0] = 0;
             s->mv[1][0][1] = 0;
-            s->last_mv[0][0][0]=
-            s->last_mv[0][0][1]= 
-            s->last_mv[1][0][0]= 
-            s->last_mv[1][0][1]= 0;
-            s->mb_skiped = 1;
+            PRINT_MB_TYPE("s");
             return 0;
         }
 
-        modb1= get_bits1(&s->gb);
-        if(modb1==0){
+        modb1= get_bits1(&s->gb); 
+        if(modb1){
+            mb_type=4; //like MB_TYPE_B_DIRECT but no vectors coded
+            cbp=0;
+        }else{
+            int field_mv;
+        
             modb2= get_bits1(&s->gb);
-            mb_type= get_vlc(&s->gb, &mb_type_b_vlc);
-            if(modb2==0) cbp= get_bits(&s->gb, 6);
-            else cbp=0;
-            if (mb_type && cbp) {
+            mb_type= get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1);
+            if(modb2) cbp= 0;
+            else      cbp= get_bits(&s->gb, 6);
+
+            if (mb_type!=MB_TYPE_B_DIRECT && cbp) {
                 if(get_bits1(&s->gb)){
-                    s->qscale +=get_bits1(&s->gb)*4 - 2;
-                    if (s->qscale < 1)
-                        s->qscale = 1;
-                    else if (s->qscale > 31)
-                        s->qscale = 31;
+                    change_qscale(s, get_bits1(&s->gb)*4 - 2);
                 }
             }
-        }else{
-            mb_type=4; //like 0 but no vectors coded
-            cbp=0;
+            field_mv=0;
+
+            if(!s->progressive_sequence){
+                if(cbp)
+                    s->interlaced_dct= get_bits1(&s->gb);
+
+                if(mb_type!=MB_TYPE_B_DIRECT && get_bits1(&s->gb)){
+                    field_mv=1;
+
+                    if(mb_type!=MB_TYPE_B_BACKW){
+                        s->field_select[0][0]= get_bits1(&s->gb);
+                        s->field_select[0][1]= get_bits1(&s->gb);
+                    }
+                    if(mb_type!=MB_TYPE_B_FORW){
+                        s->field_select[1][0]= get_bits1(&s->gb);
+                        s->field_select[1][1]= get_bits1(&s->gb);
+                    }
+                }
+            }
+
+            s->mv_dir = 0;
+            if(mb_type!=MB_TYPE_B_DIRECT && !field_mv){
+                s->mv_type= MV_TYPE_16X16;
+                if(mb_type!=MB_TYPE_B_BACKW){
+                    s->mv_dir = MV_DIR_FORWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
+                    my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
+                    s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
+                    s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+                }
+    
+                if(mb_type!=MB_TYPE_B_FORW){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
+                    my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
+                    s->last_mv[1][1][0]= s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
+                    s->last_mv[1][1][1]= s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+                }
+                if(mb_type!=MB_TYPE_B_DIRECT)
+                    PRINT_MB_TYPE(mb_type==MB_TYPE_B_FORW ? "F" : (mb_type==MB_TYPE_B_BACKW ? "B" : "T"));
+            }else if(mb_type!=MB_TYPE_B_DIRECT){
+                s->mv_type= MV_TYPE_FIELD;
+
+                if(mb_type!=MB_TYPE_B_BACKW){
+                    s->mv_dir = MV_DIR_FORWARD;
+                
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[0][i][0]  , s->f_code);
+                        my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code);
+                        s->last_mv[0][i][0]=  s->mv[0][i][0] = mx;
+                        s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2;
+                    }
+                }
+    
+                if(mb_type!=MB_TYPE_B_FORW){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[1][i][0]  , s->b_code);
+                        my = h263_decode_motion(s, s->last_mv[1][i][1]/2, s->b_code);
+                        s->last_mv[1][i][0]=  s->mv[1][i][0] = mx;
+                        s->last_mv[1][i][1]= (s->mv[1][i][1] = my)*2;
+                    }
+                }
+                if(mb_type!=MB_TYPE_B_DIRECT)
+                    PRINT_MB_TYPE(mb_type==MB_TYPE_B_FORW ? "f" : (mb_type==MB_TYPE_B_BACKW ? "b" : "t"));
+            }
         }
-        s->mv_type = MV_TYPE_16X16; // we'll switch to 8x8 only if the last P frame had 8x8 for this MB and mb_type=0 here
-        mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my
-        switch(mb_type)
-        {
-        case 0: 
-            mx = h263_decode_motion(s, 0, 1);
-            my = h263_decode_motion(s, 0, 1);
-        case 4: 
+          
+        if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){
+            int mb_index= s->mb_x + s->mb_y*s->mb_width;
+            int i;
+            
+            if(mb_type==4)
+                mx=my=0;
+            else{
+                mx = h263_decode_motion(s, 0, 1);
+                my = h263_decode_motion(s, 0, 1);
+            }
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             xy= s->block_index[0];
-            time_pp= s->last_non_b_time[0] - s->last_non_b_time[1];
-            time_pb= s->time - s->last_non_b_time[1];
-//if(time_pp>3000 )printf("%d %d  ", time_pp, time_pb);
-            //FIXME 4MV
+            time_pp= s->pp_time;
+            time_pb= s->pb_time;
+            
             //FIXME avoid divides
-            s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
-            s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
-            s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
-                                : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp + mx;
-            s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
-                                : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp + my;
-/*            s->mv[0][0][0] = 
-            s->mv[0][0][1] = 
-            s->mv[1][0][0] = 
-            s->mv[1][0][1] = 1000;*/
-            break;
-        case 1: 
-            s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-            mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
-            my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
-            s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
-            s->last_mv[0][0][1]= s->mv[0][0][1] = my;
-
-            mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
-            my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
-            s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
-            s->last_mv[1][0][1]= s->mv[1][0][1] = my;
-            break;
-        case 2: 
-            s->mv_dir = MV_DIR_BACKWARD;
-            mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
-            my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
-            s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
-            s->last_mv[1][0][1]= s->mv[1][0][1] = my;
-            break;
-        case 3:
-            s->mv_dir = MV_DIR_FORWARD;
-            mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
-            my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
-            s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
-            s->last_mv[0][0][1]= s->mv[0][0][1] = my;
-            break;
-        default: return -1;
+            switch(s->co_located_type_table[mb_index]){
+            case 0:
+                s->mv_type= MV_TYPE_16X16;
+                s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+                s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+                s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
+                                    : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
+                s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
+                                    : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
+                PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
+                break;
+            case CO_LOCATED_TYPE_4MV:
+                s->mv_type = MV_TYPE_8X8;
+                for(i=0; i<4; i++){
+                    xy= s->block_index[i];
+                    s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+                    s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+                    s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
+                                        : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
+                    s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] 
+                                        : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
+                }
+                PRINT_MB_TYPE("4");
+                break;
+            case CO_LOCATED_TYPE_FIELDMV:
+                s->mv_type = MV_TYPE_FIELD;
+                for(i=0; i<2; i++){
+                    if(s->top_field_first){
+                        time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
+                        time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
+                    }else{
+                        time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
+                        time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
+                    }
+                    s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
+                    s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
+                    s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
+                                        : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
+                    s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] 
+                                        : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
+                }
+                PRINT_MB_TYPE("=");
+                break;
+            }
+        }
+        
+        if(mb_type<0 || mb_type>4){
+            printf("illegal MB_type\n");
+            return -1;
         }
     } else { /* I-Frame */
-        cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc);
+        cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 1);
         if (cbpc < 0)
             return -1;
         dquant = cbpc & 4;
@@ -1450,41 +3139,52 @@ intra:
             if (s->ac_pred && s->h263_aic)
                 s->h263_aic_dir = get_bits1(&s->gb);
         }
-        if (s->h263_aic) {
-            s->y_dc_scale = 2 * s->qscale;
-            s->c_dc_scale = 2 * s->qscale;
-        }
-        cbpy = get_vlc(&s->gb, &cbpy_vlc);
+        PRINT_MB_TYPE(s->ac_pred ? "A" : "I");
+        
+        cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if(cbpy<0) return -1;
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
-            s->qscale += quant_tab[get_bits(&s->gb, 2)];
-            if (s->qscale < 1)
-                s->qscale = 1;
-            else if (s->qscale > 31)
-                s->qscale = 31;
+            change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
+        }
+        if(!s->progressive_sequence)
+            s->interlaced_dct= get_bits1(&s->gb);
+
+        /* decode each block */
+        if (s->h263_pred) {
+            for (i = 0; i < 6; i++) {
+                if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1) < 0)
+                    return -1;
+            }
+        } else {
+            for (i = 0; i < 6; i++) {
+                if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+                    return -1;
+            }
         }
+        return 0;
     }
 
     /* decode each block */
     if (s->h263_pred) {
-       for (i = 0; i < 6; i++) {
-           if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+        for (i = 0; i < 6; i++) {
+            if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0) < 0)
                 return -1;
-       }
+        }
     } else {
-       for (i = 0; i < 6; i++) {
-           if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+        for (i = 0; i < 6; i++) {
+            if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
                 return -1;
-       }
+        }
     }
     return 0;
 }
 
 static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 {
-    int code, val, sign, shift, l, m;
+    int code, val, sign, shift, l;
 
-    code = get_vlc(&s->gb, &mv_vlc);
+    code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
     if (code < 0)
         return 0xffff;
 
@@ -1499,15 +3199,14 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
     if (sign)
         val = -val;
     val += pred;
-    
+
     /* modulo decoding */
     if (!s->h263_long_vectors) {
         l = (1 << (f_code - 1)) * 32;
-        m = 2 * l;
         if (val < -l) {
-            val += m;
+            val += l<<1;
         } else if (val >= l) {
-            val -= m;
+            val -= l<<1;
         }
     } else {
         /* horrible h263 long vector mode */
@@ -1597,7 +3296,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
     }
 
     for(;;) {
-        code = get_vlc(&s->gb, &rl->vlc);
+        code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
         if (code < 0)
             return -1;
         if (code == rl->n) {
@@ -1608,7 +3307,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
             if (s->h263_rv10 && level == -128) {
                 /* XXX: should patch encoder too */
                 level = get_bits(&s->gb, 12);
-                level = (level << 20) >> 20;
+               level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension
             }
         } else {
             run = rl->table_run[code];
@@ -1635,28 +3334,34 @@ not_coded:
     return 0;
 }
 
-static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
+static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
 {
     int level, pred, code;
     UINT16 *dc_val;
 
     if (n < 4) 
-        code = get_vlc(&s->gb, &dc_lum);
+        code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1);
     else 
-        code = get_vlc(&s->gb, &dc_chrom);
-    if (code < 0)
+        code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1);
+    if (code < 0 || code > 9 /* && s->nbit<9 */){
+        fprintf(stderr, "illegal dc vlc\n");
         return -1;
+    }
     if (code == 0) {
         level = 0;
     } else {
         level = get_bits(&s->gb, code);
         if ((level >> (code - 1)) == 0) /* if MSB not set it is negative*/
             level = - (level ^ ((1 << code) - 1));
-        if (code > 8)
-            skip_bits1(&s->gb); /* marker */
+        if (code > 8){
+            if(get_bits1(&s->gb)==0){ /* marker */
+                fprintf(stderr, "dc marker bit missing\n");
+                return -1;
+            }
+        }
     }
 
-    pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
+    pred = ff_mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
     level += pred;
     if (level < 0)
         level = 0;
@@ -1668,25 +3373,43 @@ static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
     return level;
 }
 
-static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
-                              int n, int coded)
+/**
+ * decode a block
+ * returns 0 if everything went ok
+ * returns DECODING_AC_LOST   if an error was detected during AC decoding
+ * returns DECODING_ACDC_LOST if an error was detected during DC decoding
+ */
+static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
+                              int n, int coded, int intra)
 {
-    int code, level, i, j, last, run;
+    int level, i, last, run;
     int dc_pred_dir;
-    RLTable *rl;
-    const UINT8 *scan_table;
+    RLTable * rl;
+    RL_VLC_ELEM * rl_vlc;
+    const UINT8 * scan_table;
+    int qmul, qadd;
 
-    if (s->mb_intra) {
+    if(intra) {
        /* DC coef */
-        level = mpeg4_decode_dc(s, n, &dc_pred_dir);
-        if (level < 0)
-            return -1;
+        if(s->data_partitioning && s->pict_type!=B_TYPE){
+            level = s->dc_val[0][ s->block_index[n] ];
+            if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
+            else    level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
+            dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_width]<<n)&32;
+        }else{
+            level = mpeg4_decode_dc(s, n, &dc_pred_dir);
+            if (level < 0)
+                return DECODING_ACDC_LOST;
+        }
         block[0] = level;
-       i = 1;
+        i = 0;
         if (!coded) 
             goto not_coded;
         rl = &rl_intra;
-        if (s->ac_pred) {
+        rl_vlc = rl_intra.rl_vlc[0];
+        if(s->alternate_scan)
+            scan_table = ff_alternate_vertical_scan; /* left */
+        else if (s->ac_pred) {
             if (dc_pred_dir == 0) 
                 scan_table = ff_alternate_vertical_scan; /* left */
             else
@@ -1694,79 +3417,147 @@ static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         } else {
             scan_table = zigzag_direct;
         }
+        qmul=1;
+        qadd=0;
     } else {
-       i = 0;
+        i = -1;
         if (!coded) {
-            s->block_last_index[n] = i - 1;
+            s->block_last_index[n] = i;
             return 0;
         }
         rl = &rl_inter;
-        scan_table = zigzag_direct;
-    }
+   
+        if(s->alternate_scan)
+            scan_table = ff_alternate_vertical_scan; /* left */
+        else
+            scan_table = zigzag_direct;
 
+        if(s->mpeg_quant){
+            qmul=1;
+            qadd=0;
+            rl_vlc = rl_inter.rl_vlc[0];        
+        }else{
+            qmul = s->qscale << 1;
+            qadd = (s->qscale - 1) | 1;
+            rl_vlc = rl_inter.rl_vlc[s->qscale];
+        }
+    }
+  {
+    OPEN_READER(re, &s->gb);
     for(;;) {
-        code = get_vlc(&s->gb, &rl->vlc);
-        if (code < 0)
-            return -1;
-        if (code == rl->n) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+        if (level==0) {
+            int cache;
+            cache= GET_CACHE(re, &s->gb);
             /* escape */
-            if (get_bits1(&s->gb) != 0) {
-                if (get_bits1(&s->gb) != 0) {
+            if (cache&0x80000000) {
+                if (cache&0x40000000) {
                     /* third escape */
-                    last = get_bits1(&s->gb);
-                    run = get_bits(&s->gb, 6);
-                    get_bits1(&s->gb); /* marker */
-                    level = get_bits(&s->gb, 12);
-                    level = (level << 20) >> 20; /* sign extend */
-                    skip_bits1(&s->gb); /* marker */
+                    SKIP_CACHE(re, &s->gb, 2);
+                    last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                    run=   SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6);
+                    SKIP_COUNTER(re, &s->gb, 2+1+6);
+                    UPDATE_CACHE(re, &s->gb);
+
+                    if(SHOW_UBITS(re, &s->gb, 1)==0){
+                        fprintf(stderr, "1. marker bit missing in 3. esc\n");
+                        return DECODING_AC_LOST;
+                    }; SKIP_CACHE(re, &s->gb, 1);
+                    
+                    level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12);
+                    if(SHOW_UBITS(re, &s->gb, 1)==0){
+                        fprintf(stderr, "2. marker bit missing in 3. esc\n");
+                        return DECODING_AC_LOST;
+                    }; LAST_SKIP_CACHE(re, &s->gb, 1);
+                    
+                    SKIP_COUNTER(re, &s->gb, 1+12+1);
+                    
+                    if(level*s->qscale>1024 || level*s->qscale<-1024){
+                        fprintf(stderr, "|level| overflow in 3. esc, qp=%d\n", s->qscale);
+                        return DECODING_AC_LOST;
+                    }
+#if 1 
+                    {
+                        const int abs_level= ABS(level);
+                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN && s->error_resilience>=0){
+                            const int run1= run - rl->max_run[last][abs_level] - 1;
+                            if(abs_level <= rl->max_level[last][run]){
+                                fprintf(stderr, "illegal 3. esc, vlc encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                            if(abs_level <= rl->max_level[last][run]*2){
+                                fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                            if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
+                                fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n");
+                                return DECODING_AC_LOST;
+                            }
+                        }
+                    }
+#endif
+                   if (level>0) level= level * qmul + qadd;
+                    else         level= level * qmul - qadd;
+
+                    i+= run + 1;
+                    if(last) i+=192;
                 } else {
                     /* second escape */
-                    code = get_vlc(&s->gb, &rl->vlc);
-                    if (code < 0 || code >= rl->n)
-                        return -1;
-                    run = rl->table_run[code];
-                    level = rl->table_level[code];
-                    last = code >= rl->last;
-                    run += rl->max_run[last][level] + 1;
-                    if (get_bits1(&s->gb))
-                        level = -level;
+#if MIN_CACHE_BITS < 20
+                    LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+#else
+                    SKIP_BITS(re, &s->gb, 2);
+#endif
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                    i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing
+                    level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                    LAST_SKIP_BITS(re, &s->gb, 1);
                 }
             } else {
                 /* first escape */
-                code = get_vlc(&s->gb, &rl->vlc);
-                if (code < 0 || code >= rl->n)
-                    return -1;
-                run = rl->table_run[code];
-                level = rl->table_level[code];
-                last = code >= rl->last;
-                level += rl->max_level[last][run];
-                if (get_bits1(&s->gb))
-                    level = -level;
+#if MIN_CACHE_BITS < 19
+                LAST_SKIP_BITS(re, &s->gb, 1);
+                UPDATE_CACHE(re, &s->gb);
+#else
+                SKIP_BITS(re, &s->gb, 1);
+#endif
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                i+= run;
+                level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
             }
         } else {
-            run = rl->table_run[code];
-            level = rl->table_level[code];
-            last = code >= rl->last;
-            if (get_bits1(&s->gb))
-                level = -level;
+            i+= run;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            LAST_SKIP_BITS(re, &s->gb, 1);
         }
-        i += run;
-        if (i >= 64)
-            return -1;
-       j = scan_table[i];
-        block[j] = level;
-        i++;
-        if (last)
+        if (i > 62){
+            i-= 192;
+            if(i&(~63)){
+                fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return DECODING_AC_LOST;
+            }
+
+            block[scan_table[i]] = level;
             break;
+        }
+
+        block[scan_table[i]] = level;
     }
+    CLOSE_READER(re, &s->gb);
+  }
  not_coded:
     if (s->mb_intra) {
         mpeg4_pred_ac(s, block, n, dc_pred_dir);
         if (s->ac_pred) {
-            i = 64; /* XXX: not optimal */
+            i = 63; /* XXX: not optimal */
         }
     }
-    s->block_last_index[n] = i - 1;
+    s->block_last_index[n] = i;
     return 0;
 }
 
@@ -1775,15 +3566,24 @@ int h263_decode_picture_header(MpegEncContext *s)
 {
     int format, width, height;
 
-    /* picture header */
-    if (get_bits(&s->gb, 22) != 0x20)
+    /* picture start code */
+    if (get_bits(&s->gb, 22) != 0x20) {
+        fprintf(stderr, "Bad picture start code\n");
         return -1;
+    }
+    /* temporal reference */
     s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
-    
-    if (get_bits1(&s->gb) != 1)
-        return -1;     /* marker */
-    if (get_bits1(&s->gb) != 0)
+
+    /* PTYPE starts here */    
+    if (get_bits1(&s->gb) != 1) {
+        /* marker */
+        fprintf(stderr, "Bad marker\n");
+        return -1;
+    }
+    if (get_bits1(&s->gb) != 0) {
+        fprintf(stderr, "Bad H263 id\n");
         return -1;     /* h263 id */
+    }
     skip_bits1(&s->gb);        /* split screen off */
     skip_bits1(&s->gb);        /* camera  off */
     skip_bits1(&s->gb);        /* freeze picture release off */
@@ -1792,6 +3592,12 @@ int h263_decode_picture_header(MpegEncContext *s)
     s->gob_number = 0;
         
     format = get_bits(&s->gb, 3);
+    /*
+        0    forbidden
+        1    sub-QCIF
+        10   QCIF
+        7      extended PTYPE (PLUSPTYPE)
+    */
 
     if (format != 7 && format != 6) {
         s->h263_plus = 0;
@@ -1808,15 +3614,18 @@ int h263_decode_picture_header(MpegEncContext *s)
         s->unrestricted_mv = get_bits1(&s->gb); 
         s->h263_long_vectors = s->unrestricted_mv;
 
-        if (get_bits1(&s->gb) != 0)
+        if (get_bits1(&s->gb) != 0) {
+            fprintf(stderr, "H263 SAC not supported\n");
             return -1; /* SAC: off */
+        }
         if (get_bits1(&s->gb) != 0) {
             s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
         }   
         
-        if (get_bits1(&s->gb) != 0)
+        if (get_bits1(&s->gb) != 0) {
+            fprintf(stderr, "H263 PB frame not supported\n");
             return -1; /* not PB frame */
-
+        }
         s->qscale = get_bits(&s->gb, 5);
         skip_bits1(&s->gb);    /* Continuous Presence Multipoint mode: off */
     } else {
@@ -1825,10 +3634,12 @@ int h263_decode_picture_header(MpegEncContext *s)
         /* H.263v2 */
         s->h263_plus = 1;
         ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
-        
+
+        /* ufep other than 0 and 1 are reserved */        
         if (ufep == 1) {
             /* OPPTYPE */       
             format = get_bits(&s->gb, 3);
+            dprintf("ufep=1, format: %d\n", format);
             skip_bits(&s->gb,1); /* Custom PCF */
             s->umvplus_dec = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
             skip_bits1(&s->gb); /* Syntax-based Arithmetic Coding (SAC) */
@@ -1838,34 +3649,59 @@ int h263_decode_picture_header(MpegEncContext *s)
             if (get_bits1(&s->gb) != 0) { /* Advanced Intra Coding (AIC) */
                 s->h263_aic = 1;
             }
+           
             skip_bits(&s->gb, 7);
+            /* these are the 7 bits: (in order of appearence  */
+            /* Deblocking Filter */
+            /* Slice Structured */
+            /* Reference Picture Selection */
+            /* Independent Segment Decoding */
+            /* Alternative Inter VLC */
+            /* Modified Quantization */
+            /* Prevent start code emulation */
+
             skip_bits(&s->gb, 3); /* Reserved */
-        } else if (ufep != 0)
+        } else if (ufep != 0) {
+            fprintf(stderr, "Bad UFEP type (%d)\n", ufep);
             return -1;
+        }
             
         /* MPPTYPE */
-        s->pict_type = get_bits(&s->gb, 3) + 1;
+        s->pict_type = get_bits(&s->gb, 3) + I_TYPE;
+        dprintf("pict_type: %d\n", s->pict_type);
         if (s->pict_type != I_TYPE &&
             s->pict_type != P_TYPE)
             return -1;
         skip_bits(&s->gb, 2);
         s->no_rounding = get_bits1(&s->gb);
-        //fprintf(stderr, "\nRTYPE: %d", s->no_rounding);
+        dprintf("RTYPE: %d\n", s->no_rounding);
         skip_bits(&s->gb, 4);
         
         /* Get the picture dimensions */
         if (ufep) {
             if (format == 6) {
                 /* Custom Picture Format (CPFMT) */
-                skip_bits(&s->gb, 4); /* aspect ratio */
+                s->aspect_ratio_info = get_bits(&s->gb, 4);
+                dprintf("aspect: %d\n", s->aspect_ratio_info);
+                /* aspect ratios:
+                0 - forbidden
+                1 - 1:1
+                2 - 12:11 (CIF 4:3)
+                3 - 10:11 (525-type 4:3)
+                4 - 16:11 (CIF 16:9)
+                5 - 40:33 (525-type 16:9)
+                6-14 - reserved
+                */
                 width = (get_bits(&s->gb, 9) + 1) * 4;
                 skip_bits1(&s->gb);
                 height = get_bits(&s->gb, 9) * 4;
-#ifdef DEBUG 
-                fprintf(stderr,"\nH.263+ Custom picture: %dx%d\n",width,height);
-#endif            
-            }
-            else {
+                dprintf("\nH.263+ Custom picture: %dx%d\n",width,height);
+                if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
+                    /* aspected dimensions */
+                   s->aspected_width = get_bits(&s->gb, 8);
+                   s->aspected_height = get_bits(&s->gb, 8);
+                }
+            } else {
                 width = h263_format[format][0];
                 height = h263_format[format][1];
             }
@@ -1885,6 +3721,15 @@ int h263_decode_picture_header(MpegEncContext *s)
         skip_bits(&s->gb, 8);
     }
     s->f_code = 1;
+    
+    if(s->h263_aic){
+         s->y_dc_scale_table= 
+         s->c_dc_scale_table= h263_aic_dc_scale_table;
+    }else{
+        s->y_dc_scale_table=
+        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+    }
+
     return 0;
 }
 
@@ -1960,13 +3805,13 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
 // the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
 // so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
     virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) 
-        + RDIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
+        + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
     virtual_ref[0][1]= 16*vop_ref[0][1] 
-        + RDIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
+        + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
     virtual_ref[1][0]= 16*vop_ref[0][0] 
-        + RDIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
+        + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
     virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) 
-        + RDIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
+        + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
 
     switch(s->num_sprite_warping_points)
     {
@@ -2075,12 +3920,14 @@ printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
     else
         s->real_sprite_warping_points= s->num_sprite_warping_points;
 
+//printf("%d %d %d %d\n", d[0][0], d[0][1], s->sprite_offset[0][0], s->sprite_offset[0][1]);
 }
 
 /* decode mpeg4 VOP header */
 int mpeg4_decode_picture_header(MpegEncContext * s)
 {
     int time_incr, startcode, state, v;
+    int time_increment;
 
  redo:
     /* search next start code */
@@ -2094,10 +3941,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
             break;
         }
         state = ((state << 8) | v) & 0xffffff;
-        /* XXX: really detect end of frame */
-        if (state == 0){
-            printf("illegal zero code found\n");
-            return -1;
+        if( get_bits_count(&s->gb) > s->gb.size*8-32){
+            if(s->gb.size>50){
+                printf("no VOP startcode found, frame size was=%d\n", s->gb.size);
+                return -1;
+            }else{
+                printf("frame skip\n");
+                return FRAME_SKIPED;
+            }
         }
     }
 //printf("startcode %X %d\n", startcode, get_bits_count(&s->gb));
@@ -2106,23 +3957,45 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
 
         /* vol header */
         skip_bits(&s->gb, 1); /* random access */
-        skip_bits(&s->gb, 8); /* vo_type */
+        s->vo_type= get_bits(&s->gb, 8);
         if (get_bits1(&s->gb) != 0) { /* is_ol_id */
             vo_ver_id = get_bits(&s->gb, 4); /* vo_ver_id */
             skip_bits(&s->gb, 3); /* vo_priority */
         } else {
             vo_ver_id = 1;
         }
-        
+//printf("vo type:%d\n",s->vo_type);
         s->aspect_ratio_info= get_bits(&s->gb, 4);
-       if(s->aspect_ratio_info == EXTENDET_PAR){
-            skip_bits(&s->gb, 8); //par_width
-            skip_bits(&s->gb, 8); // par_height
+       if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){     
+           s->aspected_width = get_bits(&s->gb, 8); // par_width
+           s->aspected_height = get_bits(&s->gb, 8); // par_height
         }
-        if(get_bits1(&s->gb)){ /* vol control parameter */
-            printf("vol control parameter not supported\n");
-            return -1;   
+
+        if ((s->vol_control_parameters=get_bits1(&s->gb))) { /* vol control parameter */
+            int chroma_format= get_bits(&s->gb, 2);
+            if(chroma_format!=1){
+                printf("illegal chroma format\n");
+            }
+            s->low_delay= get_bits1(&s->gb);
+            if(get_bits1(&s->gb)){ /* vbv parameters */
+                get_bits(&s->gb, 15);  /* first_half_bitrate */
+                skip_bits1(&s->gb);    /* marker */
+                get_bits(&s->gb, 15);  /* latter_half_bitrate */
+                skip_bits1(&s->gb);    /* marker */
+                get_bits(&s->gb, 15);  /* first_half_vbv_buffer_size */
+                skip_bits1(&s->gb);    /* marker */
+                get_bits(&s->gb, 3);   /* latter_half_vbv_buffer_size */
+                get_bits(&s->gb, 11);  /* first_half_vbv_occupancy */
+                skip_bits1(&s->gb);    /* marker */
+                get_bits(&s->gb, 15);  /* latter_half_vbv_occupancy */
+                skip_bits1(&s->gb);    /* marker */               
+            }
+        }else{
+            // set low delay flag only once so the smart? low delay detection wont be overriden
+            if(s->picture_number==0)
+                s->low_delay=0;
         }
+
         s->shape = get_bits(&s->gb, 2); /* vol shape */
         if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n");
         if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
@@ -2133,6 +4006,7 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
         skip_bits1(&s->gb);   /* marker */
         
         s->time_increment_resolution = get_bits(&s->gb, 16);
+        
         s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
         if (s->time_increment_bits < 1)
             s->time_increment_bits = 1;
@@ -2149,10 +4023,15 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
                 skip_bits1(&s->gb);   /* marker */
                 height = get_bits(&s->gb, 13);
                 skip_bits1(&s->gb);   /* marker */
+                if(width && height){ /* they should be non zero but who knows ... */
+                    s->width = width;
+                    s->height = height;
+//                    printf("width/height: %d %d\n", width, height);
+                }
             }
             
-            if(get_bits1(&s->gb)) printf("interlaced not supported\n");   /* interlaced */
-            if(!get_bits1(&s->gb)) printf("OBMC not supported\n");   /* OBMC Disable */
+            s->progressive_sequence= get_bits1(&s->gb)^1;
+            if(!get_bits1(&s->gb)) printf("OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
             if (vo_ver_id == 1) {
                 s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */
             } else {
@@ -2181,26 +4060,75 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
             if (get_bits1(&s->gb) == 1) {   /* not_8_bit */
                 s->quant_precision = get_bits(&s->gb, 4); /* quant_precision */
                 if(get_bits(&s->gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */
+                if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision);
             } else {
                 s->quant_precision = 5;
             }
             
             // FIXME a bunch of grayscale shape things
-            if(get_bits1(&s->gb)) printf("Quant-Type not supported\n");  /* vol_quant_type */ //FIXME
+
+            if((s->mpeg_quant=get_bits1(&s->gb))){ /* vol_quant_type */
+                int i, j, v;
+                
+                /* load default matrixes */
+                for(i=0; i<64; i++){
+                    v= ff_mpeg4_default_intra_matrix[i];
+                    s->intra_matrix[i]= v;
+                    s->chroma_intra_matrix[i]= v;
+                    
+                    v= ff_mpeg4_default_non_intra_matrix[i];
+                    s->inter_matrix[i]= v;
+                    s->chroma_inter_matrix[i]= v;
+                }
+
+                /* load custom intra matrix */
+                if(get_bits1(&s->gb)){
+                    for(i=0; i<64; i++){
+                        v= get_bits(&s->gb, 8);
+                        if(v==0) break;
+
+                        j= zigzag_direct[i];
+                        s->intra_matrix[j]= v;
+                        s->chroma_intra_matrix[j]= v;
+                    }
+                }
+
+                /* load custom non intra matrix */
+                if(get_bits1(&s->gb)){
+                    for(i=0; i<64; i++){
+                        v= get_bits(&s->gb, 8);
+                        if(v==0) break;
+
+                        j= zigzag_direct[i];
+                        s->inter_matrix[j]= v;
+                        s->chroma_inter_matrix[j]= v;
+                    }
+
+                    /* replicate last value */
+                    for(; i<64; i++){
+                        j= zigzag_direct[i];
+                        s->inter_matrix[j]= v;
+                        s->chroma_inter_matrix[j]= v;
+                    }
+                }
+
+                // FIXME a bunch of grayscale shape things
+            }
+
             if(vo_ver_id != 1)
                  s->quarter_sample= get_bits1(&s->gb);
             else s->quarter_sample=0;
 
             if(!get_bits1(&s->gb)) printf("Complexity estimation not supported\n");
-#if 0
-            if(get_bits1(&s->gb)) printf("resync disable\n");
-#else
-            skip_bits1(&s->gb);   /* resync_marker_disabled */
-#endif
-            s->data_partioning= get_bits1(&s->gb);
-            if(s->data_partioning){
-                printf("data partitioning not supported\n");
-                skip_bits1(&s->gb); // reversible vlc
+
+            s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */
+
+            s->data_partitioning= get_bits1(&s->gb);
+            if(s->data_partitioning){
+                s->rvlc= get_bits1(&s->gb);
+                if(s->rvlc){
+                    printf("reversible vlc not supported\n");
+                }
             }
             
             if(vo_ver_id != 1) {
@@ -2219,9 +4147,18 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
             }
 
             s->scalability= get_bits1(&s->gb);
+            if(s->workaround_bugs==1) s->scalability=0;
             if (s->scalability) {
-                printf("bad scalability!!!\n");
-                return -1;
+                int dummy= s->hierachy_type= get_bits1(&s->gb);
+                int ref_layer_id= get_bits(&s->gb, 4);
+                int ref_layer_sampling_dir= get_bits1(&s->gb);
+                int h_sampling_factor_n= get_bits(&s->gb, 5);
+                int h_sampling_factor_m= get_bits(&s->gb, 5);
+                int v_sampling_factor_n= get_bits(&s->gb, 5);
+                int v_sampling_factor_m= get_bits(&s->gb, 5);
+                s->enhancement_type= get_bits1(&s->gb);
+                // bin shape stuff FIXME
+                printf("scalability not supported\n");
             }
         }
 //printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
@@ -2241,16 +4178,15 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
         }
         buf[255]=0;
         e=sscanf(buf, "DivX%dBuild%d", &ver, &build);
+        if(e!=2)
+            e=sscanf(buf, "DivX%db%d", &ver, &build);
         if(e==2){
             s->divx_version= ver;
             s->divx_build= build;
             if(s->picture_number==0){
                 printf("This file was encoded with DivX%d Build%d\n", ver, build);
-                if(ver==500 && build==413){ //most likely all version are indeed totally buggy but i dunno for sure ...
+                if(ver==500 && build==413){
                     printf("WARNING: this version of DivX is not MPEG4 compatible, trying to workaround these bugs...\n");
-                }else{
-                    printf("hmm, i havnt seen that version of divx yet, lets assume they fixed these bugs ...\n"
-                           "using mpeg4 decoder, if it fails contact the developers (of ffmpeg)\n");
                 }
             }
         }
@@ -2260,23 +4196,55 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
         goto redo;
     }
 
-    s->pict_type = get_bits(&s->gb, 2) + 1;    /* pict type: I = 0 , P = 1 */
-//printf("pic: %d\n", s->pict_type); 
+    s->pict_type = get_bits(&s->gb, 2) + I_TYPE;       /* pict type: I = 0 , P = 1 */
+//if(s->pict_type!=I_TYPE) return FRAME_SKIPED;
+    if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0){
+        printf("low_delay flag set, but shouldnt, clearing it\n");
+        s->low_delay=0;
+    }
+// printf("pic: %d, qpel:%d part:%d resync:%d\n", s->pict_type, s->quarter_sample, s->data_partitioning, s->resync_marker); 
+    
+    if(s->time_increment_resolution==0){
+        s->time_increment_resolution=1;
+//        fprintf(stderr, "time_increment_resolution is illegal\n");
+    }
     time_incr=0;
     while (get_bits1(&s->gb) != 0) 
         time_incr++;
 
     check_marker(&s->gb, "before time_increment");
-    s->time_increment= get_bits(&s->gb, s->time_increment_bits);
+    time_increment= get_bits(&s->gb, s->time_increment_bits);
+//printf(" type:%d modulo_time_base:%d increment:%d\n", s->pict_type, time_incr, time_increment);
     if(s->pict_type!=B_TYPE){
+        s->last_time_base= s->time_base;
         s->time_base+= time_incr;
-        s->last_non_b_time[1]= s->last_non_b_time[0];
-        s->last_non_b_time[0]= s->time_base*s->time_increment_resolution + s->time_increment;
+        s->time= s->time_base*s->time_increment_resolution + time_increment;
+        if(s->time < s->last_non_b_time && s->workaround_bugs==3){
+            fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n");
+            s->time_base++;
+            s->time+= s->time_increment_resolution;
+        }
+        s->pp_time= s->time - s->last_non_b_time;
+        s->last_non_b_time= s->time;
     }else{
-        s->time= (s->last_non_b_time[1]/s->time_increment_resolution + time_incr)*s->time_increment_resolution;
-        s->time+= s->time_increment;
+        s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+        if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
+//            printf("messed up order, seeking?, skiping current b frame\n");
+            return FRAME_SKIPED;
+        }
+        
+        if(s->t_frame==0) s->t_frame= s->time - s->last_time_base;
+        if(s->t_frame==0) s->t_frame=1; // 1/0 protection
+//printf("%Ld %Ld %d %d\n", s->last_non_b_time, s->time, s->pp_time, s->t_frame); fflush(stdout);
+        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame) 
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
+        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame) 
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
     }
-
+    
+    s->avctx->pts= s->time*1000LL*1000LL / s->time_increment_resolution;
+    
     if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){
         printf("hmm, seems the headers arnt complete, trying to guess time_increment_bits\n");
         for(s->time_increment_bits++ ;s->time_increment_bits<16; s->time_increment_bits++){
@@ -2287,8 +4255,8 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
     /* vop coded */
     if (get_bits1(&s->gb) != 1)
         goto redo;
-//printf("time %d %d %d || %d %d %d\n", s->time_increment_bits, s->time_increment, s->time_base,
-//s->time, s->last_non_b_time[0], s->last_non_b_time[1]);  
+//printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base,
+//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);  
     if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
                           || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
         /* rounding type for motion estimation */
@@ -2319,8 +4287,14 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
 //FIXME complexity estimation stuff
      
      if (s->shape != BIN_ONLY_SHAPE) {
-         skip_bits(&s->gb, 3); /* intra dc VLC threshold */
-         //FIXME interlaced specific bits
+         int t;
+         t=get_bits(&s->gb, 3); /* intra dc VLC threshold */
+//printf("threshold %d\n", t);
+         if(!s->progressive_sequence){
+             s->top_field_first= get_bits1(&s->gb);
+             s->alternate_scan= get_bits1(&s->gb);
+         }else
+             s->alternate_scan= 0;
      }
 
      if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
@@ -2347,20 +4321,50 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
                  printf("Error, header damaged or not MPEG4 header (f_code=0)\n");
                  return -1; // makes no sense to continue, as the MV decoding will break very quickly
              }
-         }
+         }else
+             s->f_code=1;
+     
          if (s->pict_type == B_TYPE) {
              s->b_code = get_bits(&s->gb, 3);
-//printf("b-code %d\n", s->b_code);
-         }
-//printf("quant:%d fcode:%d\n", s->qscale, s->f_code);
-
+         }else
+             s->b_code=1;
+#if 0
+printf("qp:%d fc:%d bc:%d type:%s size:%d pro:%d alt:%d top:%d qpel:%d part:%d resync:%d\n", 
+    s->qscale, s->f_code, s->b_code, 
+    s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
+    s->gb.size,s->progressive_sequence, s->alternate_scan, s->top_field_first, 
+    s->quarter_sample, s->data_partitioning, s->resync_marker); 
+#endif
          if(!s->scalability){
              if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) {
                  skip_bits1(&s->gb); // vop shape coding type
              }
+         }else{
+             if(s->enhancement_type){
+                 int load_backward_shape= get_bits1(&s->gb);
+                 if(load_backward_shape){
+                     printf("load backward shape isnt supported\n");
+                 }
+             }
+             skip_bits(&s->gb, 2); //ref_select_code
          }
      }
+     /* detect buggy encoders which dont set the low_delay flag (divx4/xvid/opendivx)*/
+     // note we cannot detect divx5 without b-frames easyly (allthough its buggy too)
+     if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){
+         printf("looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n");
+         s->low_delay=1;
+     }
+
      s->picture_number++; // better than pic number==0 allways ;)
+
+     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
+     s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
+
+     if(s->divx_version==0 || s->divx_version < 500){
+         s->h_edge_pos= s->width;
+         s->v_edge_pos= s->height;
+     }
      return 0;
 }
 
@@ -2370,22 +4374,29 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
     int format;
 
     /* picture header */
-    if (get_bits(&s->gb, 22) != 0x20)
+    if (get_bits(&s->gb, 22) != 0x20) {
+        fprintf(stderr, "Bad picture start code\n");
         return -1;
-    skip_bits(&s->gb, 8); /* picture timestamp */
+    }
+    s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
 
-    if (get_bits1(&s->gb) != 1)
+    if (get_bits1(&s->gb) != 1) {
+        fprintf(stderr, "Bad marker\n");
         return -1;     /* marker */
-    if (get_bits1(&s->gb) != 0)
+    }
+    if (get_bits1(&s->gb) != 0) {
+        fprintf(stderr, "Bad H263 id\n");
         return -1;     /* h263 id */
+    }
     skip_bits1(&s->gb);        /* split screen off */
     skip_bits1(&s->gb);        /* camera  off */
     skip_bits1(&s->gb);        /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
-    if (format != 7)
+    if (format != 7) {
+        fprintf(stderr, "Intel H263 free format not supported\n");
         return -1;
-
+    }
     s->h263_plus = 0;
 
     s->pict_type = I_TYPE + get_bits1(&s->gb);
@@ -2393,12 +4404,18 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
     s->unrestricted_mv = get_bits1(&s->gb); 
     s->h263_long_vectors = s->unrestricted_mv;
 
-    if (get_bits1(&s->gb) != 0)
+    if (get_bits1(&s->gb) != 0) {
+        fprintf(stderr, "SAC not supported\n");
         return -1;     /* SAC: off */
-    if (get_bits1(&s->gb) != 0)
+    }
+    if (get_bits1(&s->gb) != 0) {
+        fprintf(stderr, "Advanced Prediction Mode not supported\n");
         return -1;     /* advanced prediction mode: off */
-    if (get_bits1(&s->gb) != 0)
-        return -1;     /* not PB frame */
+    }
+    if (get_bits1(&s->gb) != 0) {
+        fprintf(stderr, "PB frame mode no supported\n");
+        return -1;     /* PB frame mode */
+    }
 
     /* skip unknown header garbage */
     skip_bits(&s->gb, 41);