]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/h263.c
* Ogg/Vorbis patch by Mark Hills
[ffmpeg] / libavcodec / h263.c
index 378ad2a090891059dadd71355e7a460f00c94756..8bb23eac5f01e5e85cd540c94561694c5fd19383 100644 (file)
@@ -18,7 +18,9 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  *
- * ac prediction encoding & b-frame support by Michael Niedermayer <michaelni@gmx.at>
+ * ac prediction encoding, b-frame support, error resilience, optimizations,
+ * qpel decoding, gmc decoding, interlaced decoding, 
+ * by Michael Niedermayer <michaelni@gmx.at>
  */
  
 //#define DEBUG
 #include "h263data.h"
 #include "mpeg4data.h"
 
-//rounded divison & shift
-#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
+//#undef NDEBUG
+//#include <assert.h>
 
+#if 1
 #define PRINT_MB_TYPE(a) {}
-//#define PRINT_MB_TYPE(a) printf(a)
+#else
+#define PRINT_MB_TYPE(a) printf(a)
+#endif
 
 #define INTRA_MCBPC_VLC_BITS 6
 #define INTER_MCBPC_VLC_BITS 6
@@ -44,6 +49,7 @@
 #define MB_TYPE_B_VLC_BITS 4
 #define TEX_VLC_BITS 9
 
+#ifdef CONFIG_ENCODERS
 static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
                              int n);
 static void h263_encode_motion(MpegEncContext * s, int val, int fcode);
@@ -51,6 +57,8 @@ static void h263p_encode_umotion(MpegEncContext * s, int val);
 static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int dc, UINT8 *scan_table, 
                                PutBitContext *dc_pb, PutBitContext *ac_pb);
+#endif
+
 static int h263_decode_motion(MpegEncContext * s, int pred, int fcode);
 static int h263p_decode_umotion(MpegEncContext * s, int pred);
 static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
@@ -62,15 +70,18 @@ static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr);
 static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
                               int dir);
 static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr);
 
 extern UINT32 inverse[256];
 
-static UINT16 mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
+static UINT16 uni_DCtab_lum  [512][2];
+static UINT16 uni_DCtab_chrom[512][2];
+
+#ifdef CONFIG_ENCODERS
+static UINT16 (*mv_penalty)[MAX_MV*2+1]= NULL;
 static UINT8 fcode_tab[MAX_MV*2+1];
 static UINT8 umv_fcode_tab[MAX_MV*2+1];
 
-static UINT16 uni_DCtab_lum  [512][2];
-static UINT16 uni_DCtab_chrom[512][2];
 static UINT32 uni_mpeg4_intra_rl_bits[64*64*2*2];
 static UINT8  uni_mpeg4_intra_rl_len [64*64*2*2];
 static UINT32 uni_mpeg4_inter_rl_bits[64*64*2*2];
@@ -87,6 +98,8 @@ intra
 max level: 53/16
 max run: 29/41
 */
+#endif
+
 
 int h263_get_picture_format(int width, int height)
 {
@@ -107,6 +120,26 @@ int h263_get_picture_format(int width, int height)
     return format;
 }
 
+static void init_aspect_info(MpegEncContext * s){
+    double aspect;
+    
+    emms_c(); //paranoia ;)
+    
+    if(s->avctx->aspect_ratio==0) aspect= 1.0;
+    aspect= s->avctx->aspect_ratio;
+    
+    ff_float2fraction(&s->aspected_width, &s->aspected_height, aspect, 255);
+    
+    if(s->aspected_width == 4 && s->aspected_height == 3)
+        s->aspect_ratio_info= FF_ASPECT_4_3_625;
+    else if(s->aspected_width == 16 && s->aspected_height == 9)
+        s->aspect_ratio_info= FF_ASPECT_16_9_625;
+    else if(s->aspected_width == 1 && s->aspected_height == 1)
+        s->aspect_ratio_info= FF_ASPECT_SQUARE;
+    else
+        s->aspect_ratio_info= FF_ASPECT_EXTENDED;
+}
+
 void h263_encode_picture_header(MpegEncContext * s, int picture_number)
 {
     int format;
@@ -183,14 +216,17 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
                
                if (format == 7) {
             /* Custom Picture Format (CPFMT) */
-               
-           if (s->aspect_ratio_info)
+            init_aspect_info(s);
+
             put_bits(&s->pb,4,s->aspect_ratio_info);
-           else
-            put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */
             put_bits(&s->pb,9,(s->width >> 2) - 1);
             put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
             put_bits(&s->pb,9,(s->height >> 2));
+           if (s->aspect_ratio_info == FF_ASPECT_EXTENDED)
+           {
+               put_bits(&s->pb, 8, s->aspected_width);
+               put_bits(&s->pb, 8, s->aspected_height);
+           }
         }
         
         /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
@@ -210,51 +246,25 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
     }
 }
 
+/**
+ * Encodes a group of blocks header.
+ */
 int h263_encode_gob_header(MpegEncContext * s, int mb_line)
 {
-    int pdif=0;
-    
-    /* Check to see if we need to put a new GBSC */
-    /* for RTP packetization                    */
-    if (s->rtp_mode) {
-        pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
-        if (pdif >= s->rtp_payload_size) {
-            /* Bad luck, packet must be cut before */
-            align_put_bits(&s->pb);
-            flush_put_bits(&s->pb);
-            /* Call the RTP callback to send the last GOB */
-            if (s->rtp_callback) {
-                pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
-                s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
-            }
-            s->ptr_lastgob = pbBufPtr(&s->pb);
-            put_bits(&s->pb, 17, 1); /* GBSC */
-            s->gob_number = mb_line / s->gob_index;
-            put_bits(&s->pb, 5, s->gob_number); /* GN */
-            put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
-            put_bits(&s->pb, 5, s->qscale); /* GQUANT */
-            //fprintf(stderr,"\nGOB: %2d size: %d", s->gob_number - 1, pdif);
-            return pdif;
-       } else if (pdif + s->mb_line_avgsize >= s->rtp_payload_size) {
-           /* Cut the packet before we can't */
            align_put_bits(&s->pb);
            flush_put_bits(&s->pb);
            /* Call the RTP callback to send the last GOB */
            if (s->rtp_callback) {
-               pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
+               int pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
                s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
            }
-           s->ptr_lastgob = pbBufPtr(&s->pb);
            put_bits(&s->pb, 17, 1); /* GBSC */
            s->gob_number = mb_line / s->gob_index;
            put_bits(&s->pb, 5, s->gob_number); /* GN */
            put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */
            put_bits(&s->pb, 5, s->qscale); /* GQUANT */
            //fprintf(stderr,"\nGOB: %2d size: %d", s->gob_number - 1, pdif);
-           return pdif;
-       }
-   }
-   return 0;
+    return 0;
 }
 
 static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6])
@@ -268,22 +278,50 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
         ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
         ac_val1= ac_val;
         if(dir[n]){
+            const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width;
+            /* top prediction */
             ac_val-= s->block_wrap[n]*16;
-            for(i=1; i<8; i++){
-                const int level= block[n][block_permute_op(i   )];
-                score0+= ABS(level);
-                score1+= ABS(level - ac_val[i+8]);
-                ac_val1[i  ]=    block[n][block_permute_op(i<<3)];
-                ac_val1[i+8]= level;
+            if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->idct_permutation[i   ]];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ac_val[i+8]);
+                    ac_val1[i  ]=    block[n][s->idct_permutation[i<<3]];
+                    ac_val1[i+8]= level;
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->idct_permutation[i   ]];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale));
+                    ac_val1[i  ]=    block[n][s->idct_permutation[i<<3]];
+                    ac_val1[i+8]= level;
+                }
             }
         }else{
+            const int xy= s->mb_x-1 + s->mb_y*s->mb_width;
+            /* left prediction */
             ac_val-= 16;
-            for(i=1; i<8; i++){
-                const int level= block[n][block_permute_op(i<<3)];
-                score0+= ABS(level);
-                score1+= ABS(level - ac_val[i]);
-                ac_val1[i  ]= level;
-                ac_val1[i+8]=    block[n][block_permute_op(i   )];
+            if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
+                /* same qscale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->idct_permutation[i<<3]];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ac_val[i]);
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][s->idct_permutation[i   ]];
+                }
+            }else{
+                /* different qscale, we must rescale */
+                for(i=1; i<8; i++){
+                    const int level= block[n][s->idct_permutation[i<<3]];
+                    score0+= ABS(level);
+                    score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale));
+                    ac_val1[i  ]= level;
+                    ac_val1[i+8]=    block[n][s->idct_permutation[i   ]];
+                }
             }
         }
     }
@@ -291,6 +329,66 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
     return score0 > score1 ? 1 : 0;    
 }
 
+/**
+ * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
+ */
+void ff_clean_h263_qscales(MpegEncContext *s){
+    int i;
+    
+    for(i=1; i<s->mb_num; i++){
+        if(s->qscale_table[i] - s->qscale_table[i-1] >2)
+            s->qscale_table[i]= s->qscale_table[i-1]+2;
+    }
+    for(i=s->mb_num-2; i>=0; i--){
+        if(s->qscale_table[i] - s->qscale_table[i+1] >2)
+            s->qscale_table[i]= s->qscale_table[i+1]+2;
+    }
+}
+
+/**
+ * modify mb_type & qscale so that encoding is acually possible in mpeg4
+ */
+void ff_clean_mpeg4_qscales(MpegEncContext *s){
+    int i;
+    
+    ff_clean_h263_qscales(s);
+    
+    for(i=1; i<s->mb_num; i++){
+        if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_INTER4V)){
+            s->mb_type[i]&= ~MB_TYPE_INTER4V;
+            s->mb_type[i]|= MB_TYPE_INTER;
+        }
+    }
+
+    if(s->pict_type== B_TYPE){
+        int odd=0;
+        /* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than
+           for the actual adaptive quantization */
+        
+        for(i=0; i<s->mb_num; i++){
+            odd += s->qscale_table[i]&1;
+        }
+        
+        if(2*odd > s->mb_num) odd=1;
+        else                  odd=0;
+        
+        for(i=0; i<s->mb_num; i++){
+            if((s->qscale_table[i]&1) != odd)
+                s->qscale_table[i]++;
+            if(s->qscale_table[i] > 31)
+                s->qscale_table[i]= 31;
+        }            
+    
+        for(i=1; i<s->mb_num; i++){
+            if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_DIRECT)){
+                s->mb_type[i]&= ~MB_TYPE_DIRECT;
+                s->mb_type[i]|= MB_TYPE_BIDIR;
+            }
+        }
+    }
+}
+
+#ifdef CONFIG_ENCODERS
 void mpeg4_encode_mb(MpegEncContext * s,
                    DCTELEM block[6][64],
                    int motion_x, int motion_y)
@@ -301,6 +399,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
     PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb;
     PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2    : &s->pb;
     const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
+    const int dquant_code[5]= {1,0,9,2,3};
     
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
@@ -321,20 +420,27 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->last_mv[1][0][0]= 
                 s->last_mv[1][0][1]= 0;
             }
+            
+            assert(s->dquant>=-2 && s->dquant<=2);
+            assert((s->dquant&1)==0);
+            assert(mb_type>=0);
 
             /* nothing to do if this MB was skiped in the next P Frame */
-            if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){
+            if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ //FIXME avoid DCT & ...
                 s->skip_count++;
                 s->mv[0][0][0]= 
                 s->mv[0][0][1]= 
                 s->mv[1][0][0]= 
                 s->mv[1][0][1]= 0;
                 s->mv_dir= MV_DIR_FORWARD; //doesnt matter
+                s->qscale -= s->dquant;
                 return;
             }
 
             if ((cbp | motion_x | motion_y | mb_type) ==0) {
                 /* direct MB with MV={0,0} */
+                assert(s->dquant==0);
+                
                 put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
 
                 if(interleaved_stats){
@@ -349,8 +455,20 @@ void mpeg4_encode_mb(MpegEncContext * s,
             put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :)
             if(cbp) put_bits(&s->pb, 6, cbp);
             
-            if(cbp && mb_type)
-                put_bits(&s->pb, 1, 0); /* no q-scale change */
+            if(cbp && mb_type){
+                if(s->dquant)
+                    put_bits(&s->pb, 2, (s->dquant>>2)+3);
+                else
+                    put_bits(&s->pb, 1, 0);
+            }else
+                s->qscale -= s->dquant;
+            
+            if(!s->progressive_sequence){
+                if(cbp)
+                    put_bits(&s->pb, 1, s->interlaced_dct);
+                if(mb_type) // not diect mode
+                    put_bits(&s->pb, 1, 0); // no interlaced ME yet
+            }
 
             if(interleaved_stats){
                 bits= get_bit_count(&s->pb);
@@ -363,6 +481,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
             case 0: /* direct */
                 h263_encode_motion(s, motion_x, 1);
                 h263_encode_motion(s, motion_y, 1);                
+                s->b_count++;
+                s->f_count++;
                 break;
             case 1: /* bidir */
                 h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
@@ -373,18 +493,22 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->last_mv[0][0][1]= s->mv[0][0][1];
                 s->last_mv[1][0][0]= s->mv[1][0][0];
                 s->last_mv[1][0][1]= s->mv[1][0][1];
+                s->b_count++;
+                s->f_count++;
                 break;
             case 2: /* backward */
                 h263_encode_motion(s, motion_x - s->last_mv[1][0][0], s->b_code);
                 h263_encode_motion(s, motion_y - s->last_mv[1][0][1], s->b_code);
                 s->last_mv[1][0][0]= motion_x;
                 s->last_mv[1][0][1]= motion_y;
+                s->b_count++;
                 break;
             case 3: /* forward */
                 h263_encode_motion(s, motion_x - s->last_mv[0][0][0], s->f_code);
                 h263_encode_motion(s, motion_y - s->last_mv[0][0][1], s->f_code);
                 s->last_mv[0][0][0]= motion_x;
                 s->last_mv[0][0][1]= motion_y;
+                s->f_count++;
                 break;
             default:
                 printf("unknown mb type\n");
@@ -399,7 +523,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
             /* encode each block */
             for (i = 0; i < 6; i++) {
-                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb);
+                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb);
             }
 
             if(interleaved_stats){
@@ -408,7 +532,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->last_bits=bits;
             }
         }else{ /* s->pict_type==B_TYPE */
-            if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) {
+            if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
                 /* check if the B frames can skip it too, as we must skip it if we skip here 
                    why didnt they just compress the skip-mb bits instead of reusing them ?! */
                 if(s->max_b_frames>0){
@@ -432,7 +556,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                         if(s->coded_order[i+1].pict_type!=B_TYPE) break;
 
                         b_pic= s->coded_order[i+1].picture[0] + offset;
-                        diff= pix_abs16x16(p_pic, b_pic, s->linesize);
+                       diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
                             s->mb_skiped=0;
                             break;
@@ -457,12 +581,22 @@ void mpeg4_encode_mb(MpegEncContext * s,
             put_bits(&s->pb, 1, 0);    /* mb coded */
             if(s->mv_type==MV_TYPE_16X16){
                 cbpc = cbp & 3;
+                if(s->dquant) cbpc+= 8;
                 put_bits(&s->pb,
                         inter_MCBPC_bits[cbpc],
                         inter_MCBPC_code[cbpc]);
+
                 cbpy = cbp >> 2;
                 cbpy ^= 0xf;
                 put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+                if(s->dquant)
+                    put_bits(pb2, 2, dquant_code[s->dquant+2]);
+
+                if(!s->progressive_sequence){
+                    if(cbp)
+                        put_bits(pb2, 1, s->interlaced_dct);
+                    put_bits(pb2, 1, 0); // no interlaced ME yet
+                }
                     
                 if(interleaved_stats){
                     bits= get_bit_count(&s->pb);
@@ -484,6 +618,11 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 cbpy ^= 0xf;
                 put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
 
+                if(!s->progressive_sequence){
+                    if(cbp)
+                        put_bits(pb2, 1, s->interlaced_dct);
+                }
+    
                 if(interleaved_stats){
                     bits= get_bit_count(&s->pb);
                     s->misc_bits+= bits - s->last_bits;
@@ -507,7 +646,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
             /* encode each block */
             for (i = 0; i < 6; i++) {
-                mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb);
+                mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb);
             }
 
             if(interleaved_stats){
@@ -515,7 +654,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->p_tex_bits+= bits - s->last_bits;
                 s->last_bits=bits;
             }
-            s->p_count++;
+            s->f_count++;
         }
     } else {
         int cbp;
@@ -544,8 +683,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 int last_index;
 
                 mpeg4_inv_pred_ac(s, block[i], i, dir[i]);
-                if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */
-                else           st = ff_alternate_horizontal_scan; /* top */
+                if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */
+                else           st = s->intra_h_scantable.permutated; /* top */
 
                 for(last_index=63; last_index>=0; last_index--) //FIXME optimize
                     if(block[i][st[last_index]]) break;
@@ -555,7 +694,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
             }
         }else{
             for(i=0; i<6; i++)
-                scan_table[i]= zigzag_direct;
+                scan_table[i]= s->intra_scantable.permutated;
         }
 
         /* compute cbp */
@@ -567,10 +706,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
         cbpc = cbp & 3;
         if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
             put_bits(&s->pb,
                 intra_MCBPC_bits[cbpc],
                 intra_MCBPC_code[cbpc]);
         } else {
+            if(s->dquant) cbpc+=8;
             put_bits(&s->pb, 1, 0);    /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
@@ -579,6 +720,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
         put_bits(pb2, 1, s->ac_pred);
         cbpy = cbp >> 2;
         put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(dc_pb, 2, dquant_code[s->dquant+2]);
+
+        if(!s->progressive_sequence){
+            put_bits(dc_pb, 1, s->interlaced_dct);
+        }
 
         if(interleaved_stats){
             bits= get_bit_count(&s->pb);
@@ -608,10 +755,10 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
                 if(dir[i]){
                     for(j=1; j<8; j++) 
-                        block[i][block_permute_op(j   )]= ac_val[j+8];
+                        block[i][s->idct_permutation[j   ]]= ac_val[j+8];
                 }else{
                     for(j=1; j<8; j++) 
-                        block[i][block_permute_op(j<<3)]= ac_val[j  ];
+                        block[i][s->idct_permutation[j<<3]]= ac_val[j  ];
                 }
                 s->block_last_index[i]= zigzag_last_index[i];
             }
@@ -627,6 +774,7 @@ void h263_encode_mb(MpegEncContext * s,
     INT16 pred_dc;
     INT16 rec_intradc[6];
     UINT16 *dc_ptr[6];
+    const int dquant_code[5]= {1,0,9,2,3};
            
     //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
@@ -636,19 +784,22 @@ void h263_encode_mb(MpegEncContext * s,
             if (s->block_last_index[i] >= 0)
                 cbp |= 1 << (5 - i);
         }
-        if ((cbp | motion_x | motion_y) == 0) {
+        if ((cbp | motion_x | motion_y | s->dquant) == 0) {
             /* skip macroblock */
             put_bits(&s->pb, 1, 1);
             return;
         }
         put_bits(&s->pb, 1, 0);        /* mb coded */
         cbpc = cbp & 3;
+        if(s->dquant) cbpc+= 8;
         put_bits(&s->pb,
                    inter_MCBPC_bits[cbpc],
                    inter_MCBPC_code[cbpc]);
         cbpy = cbp >> 2;
         cbpy ^= 0xf;
         put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
 
         /* motion vectors: 16x16 mode only now */
         h263_pred_motion(s, 0, &pred_x, &pred_y);
@@ -712,10 +863,12 @@ void h263_encode_mb(MpegEncContext * s,
 
         cbpc = cbp & 3;
         if (s->pict_type == I_TYPE) {
+            if(s->dquant) cbpc+=4;
             put_bits(&s->pb,
                 intra_MCBPC_bits[cbpc],
                 intra_MCBPC_code[cbpc]);
         } else {
+            if(s->dquant) cbpc+=8;
             put_bits(&s->pb, 1, 0);    /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
@@ -727,6 +880,8 @@ void h263_encode_mb(MpegEncContext * s,
         }
         cbpy = cbp >> 2;
         put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+        if(s->dquant)
+            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
     }
 
     for(i=0; i<6; i++) {
@@ -740,6 +895,7 @@ void h263_encode_mb(MpegEncContext * s,
         }
     }
 }
+#endif
 
 static int h263_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr)
 {
@@ -828,7 +984,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
             if (a != 1024) {
                 ac_val -= 16;
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i*8)] += ac_val[i];
+                    block[s->idct_permutation[i<<3]] += ac_val[i];
                 }
                 pred_dc = a;
             }
@@ -837,7 +993,7 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
             if (c != 1024) {
                 ac_val -= 16 * wrap;
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i)] += ac_val[i + 8];
+                    block[s->idct_permutation[i   ]] += ac_val[i + 8];
                 }
                 pred_dc = c;
             }
@@ -865,10 +1021,10 @@ void h263_pred_acdc(MpegEncContext * s, INT16 *block, int n)
     
     /* left copy */
     for(i=1;i<8;i++)
-        ac_val1[i] = block[block_permute_op(i * 8)];
+        ac_val1[i    ] = block[s->idct_permutation[i<<3]];
     /* top copy */
     for(i=1;i<8;i++)
-        ac_val1[8 + i] = block[block_permute_op(i)];
+        ac_val1[8 + i] = block[s->idct_permutation[i   ]];
 }
 
 INT16 *h263_pred_motion(MpegEncContext * s, int block, 
@@ -885,7 +1041,7 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
 
     A = s->motion_val[xy - 1];
     /* special case for first (slice) line */
-    if ((s->mb_y == 0 || s->first_slice_line) && block<3) {
+    if (s->first_slice_line && block<3) {
         // we cant just change some MVs to simulate that as we need them for the B frames (and ME)
         // and if we ever support non rectangular objects than we need to do a few ifs here anyway :(
         if(block==0){ //most common case
@@ -931,9 +1087,10 @@ INT16 *h263_pred_motion(MpegEncContext * s, int block,
     return mot_val;
 }
 
+#ifdef CONFIG_ENCODERS
 static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
 {
-    int range, l, m, bit_size, sign, code, bits;
+    int range, l, bit_size, sign, code, bits;
 
     if (val == 0) {
         /* zero vector */
@@ -944,19 +1101,29 @@ static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
         range = 1 << bit_size;
         /* modulo encoding */
         l = range * 32;
-        m = 2 * l;
+#if 1
+        val+= l;
+        val&= 2*l-1;
+        val-= l;
+        sign = val>>31;
+        val= (val^sign)-sign;
+        sign&=1;
+#else
         if (val < -l) {
-            val += m;
+            val += 2*l;
         } else if (val >= l) {
-            val -= m;
+            val -= 2*l;
         }
 
+        assert(val>=-l && val<l);
+
         if (val >= 0) {
             sign = 0;
         } else {
             val = -val;
             sign = 1;
         }
+#endif
         val--;
         code = (val >> bit_size) + 1;
         bits = val & (range - 1);
@@ -966,6 +1133,7 @@ static void h263_encode_motion(MpegEncContext * s, int val, int f_code)
             put_bits(&s->pb, bit_size, bits);
         }
     }
+
 }
 
 /* Encode MV differences on H.263+ with Unrestricted MV mode */
@@ -1011,6 +1179,10 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
 {
     int f_code;
     int mv;
+    
+    if(mv_penalty==NULL)
+        mv_penalty= av_mallocz( sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1) );
+    
     for(f_code=1; f_code<=MAX_FCODE; f_code++){
         for(mv=-MAX_MV; mv<=MAX_MV; mv++){
             int len;
@@ -1048,6 +1220,7 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
         umv_fcode_tab[mv]= 1;
     }
 }
+#endif
 
 static void init_uni_dc_tab(void)
 {
@@ -1101,6 +1274,7 @@ static void init_uni_dc_tab(void)
     }
 }
 
+#ifdef CONFIG_ENCODERS
 static void init_uni_mpeg4_rl_tab(RLTable *rl, UINT32 *bits_tab, UINT8 *len_tab){
     int slevel, run, last;
     
@@ -1269,7 +1443,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
     last_index = s->block_last_index[n];
     last_non_zero = i - 1;
     for (; i <= last_index; i++) {
-        j = zigzag_direct[i];
+        j = s->intra_scantable.permutated[i];
         level = block[j];
         if (level) {
             run = i - last_non_zero - 1;
@@ -1293,9 +1467,12 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
            }
     }
 }
+#endif
 
 /***************************************************/
-
+/**
+ * add mpeg4 stuffing bits (01...1)
+ */
 void ff_mpeg4_stuffing(PutBitContext * pbc)
 {
     int length;
@@ -1314,13 +1491,16 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
 
         s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
     }
-
-    s->time= picture_number*(INT64)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
+    
+    if(s->avctx->pts)
+        s->time= (s->avctx->pts*s->time_increment_resolution + 500*1000)/(1000*1000);
+    else
+        s->time= picture_number*(INT64)FRAME_RATE_BASE*s->time_increment_resolution/s->frame_rate;
     time_div= s->time/s->time_increment_resolution;
     time_mod= s->time%s->time_increment_resolution;
 
     if(s->pict_type==B_TYPE){
-        s->bp_time= s->last_non_b_time - s->time;
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
     }else{
         s->last_time_base= s->time_base;
         s->time_base= time_div;
@@ -1346,10 +1526,15 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
     put_bits(&s->pb, 1, 1);            /* is obj layer id= yes */
       put_bits(&s->pb, 4, vo_ver_id);  /* is obj layer ver id */
       put_bits(&s->pb, 3, 1);          /* is obj layer priority */
-    if(s->aspect_ratio_info) 
-        put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
-    else
-        put_bits(&s->pb, 4, 1);                /* aspect ratio info= sqare pixel */
+    
+    init_aspect_info(s);
+
+    put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
+    if (s->aspect_ratio_info == FF_ASPECT_EXTENDED)
+    {
+       put_bits(&s->pb, 8, s->aspected_width);
+       put_bits(&s->pb, 8, s->aspected_height);
+    }
 
     if(s->low_delay){
         put_bits(&s->pb, 1, 1);                /* vol control parameters= yes */
@@ -1373,14 +1558,16 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
     put_bits(&s->pb, 1, 1);            /* marker bit */
     put_bits(&s->pb, 13, s->height);   /* vol height */
     put_bits(&s->pb, 1, 1);            /* marker bit */
-    put_bits(&s->pb, 1, 0);            /* interlace */
+    put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
     put_bits(&s->pb, 1, 1);            /* obmc disable */
     if (vo_ver_id == 1) {
         put_bits(&s->pb, 1, s->vol_sprite_usage=0);            /* sprite enable */
     }else{ /* vo_ver_id == 2 */
         put_bits(&s->pb, 2, s->vol_sprite_usage=0);            /* sprite enable */
     }
-    put_bits(&s->pb, 1, 0);            /* not 8 bit */
+    
+    s->quant_precision=5;
+    put_bits(&s->pb, 1, 0);            /* not 8 bit == false */
     put_bits(&s->pb, 1, s->mpeg_quant);        /* quant type= (0=h263 style)*/
     if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */
 
@@ -1399,14 +1586,17 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
         put_bits(&s->pb, 1, 0);                /* reduced res vop */
     }
     put_bits(&s->pb, 1, 0);            /* scalability */
-
+    
     ff_mpeg4_stuffing(&s->pb);
-    put_bits(&s->pb, 16, 0);
-    put_bits(&s->pb, 16, 0x1B2);       /* user_data */
-    sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR);
-    put_string(&s->pb, buf);
 
-    ff_mpeg4_stuffing(&s->pb);
+    /* user data */
+    if(!ff_bit_exact){
+        put_bits(&s->pb, 16, 0);
+        put_bits(&s->pb, 16, 0x1B2);   /* user_data */
+        sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR);
+        put_string(&s->pb, buf);
+        ff_mpeg4_stuffing(&s->pb);
+    }
 }
 
 /* write mpeg4 VOP header */
@@ -1421,6 +1611,8 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
             mpeg4_encode_vol_header(s);
     }
     
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
+
 //printf("num:%d rate:%d base:%d\n", s->picture_number, s->frame_rate, FRAME_RATE_BASE);
     
     put_bits(&s->pb, 16, 0);           /* vop header */
@@ -1445,6 +1637,10 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
        put_bits(&s->pb, 1, s->no_rounding);    /* rounding type */
     }
     put_bits(&s->pb, 3, 0);    /* intra dc VLC threshold */
+    if(!s->progressive_sequence){
+         put_bits(&s->pb, 1, s->top_field_first);
+         put_bits(&s->pb, 1, s->alternate_scan);
+    }
     //FIXME sprite stuff
 
     put_bits(&s->pb, 5, s->qscale);
@@ -1461,13 +1657,23 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
      s->v_edge_pos= s->height;
 }
 
-static void h263_dc_scale(MpegEncContext * s)
+/**
+ * change qscale by given dquant and update qscale dependant variables.
+ */
+static void change_qscale(MpegEncContext * s, int dquant)
 {
+    s->qscale += dquant;
+
+    if (s->qscale < 1)
+        s->qscale = 1;
+    else if (s->qscale > 31)
+        s->qscale = 31;
+
     s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
     s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
 }
 
-inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
+static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr)
 {
     int a, b, c, wrap, pred, scale;
     UINT16 *dc_val;
@@ -1489,6 +1695,16 @@ inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int
     b = dc_val[ - 1 - wrap];
     c = dc_val[ - wrap];
 
+    /* outside slice handling (we cant do that by memset as we need the dc for error resilience) */
+    if(s->first_slice_line && n!=3){
+        if(n!=2) b=c= 1024;
+        if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024;
+    }
+    if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){
+        if(n==0 || n==4 || n==5)
+            b=1024;
+    }
+
     if (abs(a - b) < abs(b - c)) {
        pred = c;
         *dir_ptr = 1; /* top */
@@ -1532,12 +1748,12 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
             if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i*8)] += ac_val[i];
+                    block[s->idct_permutation[i<<3]] += ac_val[i];
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
+                    block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
                 }
             }
         } else {
@@ -1548,23 +1764,23 @@ void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n,
             if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i)] += ac_val[i + 8];
+                    block[s->idct_permutation[i]] += ac_val[i + 8];
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1;i<8;i++) {
-                    block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
+                    block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
                 }
             }
         }
     }
     /* left copy */
     for(i=1;i<8;i++)
-        ac_val1[i] = block[block_permute_op(i * 8)];
+        ac_val1[i    ] = block[s->idct_permutation[i<<3]];
 
     /* top copy */
     for(i=1;i<8;i++)
-        ac_val1[8 + i] = block[block_permute_op(i)];
+        ac_val1[8 + i] = block[s->idct_permutation[i   ]];
 
 }
 
@@ -1578,16 +1794,34 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, INT16 *block, int n,
     ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
  
     if (dir == 0) {
+        const int xy= s->mb_x-1 + s->mb_y*s->mb_width;
         /* left prediction */
         ac_val -= 16;
-        for(i=1;i<8;i++) {
-            block[block_permute_op(i*8)] -= ac_val[i];
+        if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){
+            /* same qscale */
+            for(i=1;i<8;i++) {
+                block[s->idct_permutation[i<<3]] -= ac_val[i];
+            }
+        }else{
+            /* different qscale, we must rescale */
+            for(i=1;i<8;i++) {
+                block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale);
+            }
         }
     } else {
+        const int xy= s->mb_x + s->mb_y*s->mb_width - s->mb_width;
         /* top prediction */
         ac_val -= 16 * s->block_wrap[n];
-        for(i=1;i<8;i++) {
-            block[block_permute_op(i)] -= ac_val[i + 8];
+        if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){
+            /* same qscale */
+            for(i=1;i<8;i++) {
+                block[s->idct_permutation[i]] -= ac_val[i + 8];
+            }
+        }else{
+            /* different qscale, we must rescale */
+            for(i=1;i<8;i++) {
+                block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale);
+            }
         }
     }
 }
@@ -1632,12 +1866,14 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
     }
 #endif
 }
-
+#ifdef CONFIG_ENCODERS
 static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
                                UINT8 *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
 {
-    int last, i, last_non_zero, sign;
-    int code;
+    int i, last_non_zero;
+#if 0 //variables for the outcommented version
+    int code, sign, last;
+#endif
     const RLTable *rl;
     UINT32 *bits_tab;
     UINT8 *len_tab;
@@ -1746,7 +1982,7 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     }
 #endif
 }
-
+#endif
 
 
 /***********************************************/
@@ -1886,33 +2122,58 @@ void h263_decode_init_vlc(MpegEncContext *s)
                  &mb_type_b_tab[0][1], 2, 1,
                  &mb_type_b_tab[0][0], 2, 1);
     }
+}
 
-    s->progressive_sequence=1; // set to most likely for the case of incomplete headers
+/**
+ * Get the GOB height based on picture height.
+ */
+int ff_h263_get_gob_height(MpegEncContext *s){
+    if (s->height <= 400)
+        return 1;
+    else if (s->height <= 800)
+        return  2;
+    else
+        return 4;
 }
 
-int h263_decode_gob_header(MpegEncContext *s)
+/**
+ * decodes the group of blocks header.
+ * @return <0 if an error occured
+ */
+static int h263_decode_gob_header(MpegEncContext *s)
 {
     unsigned int val, gfid;
+    int left;
     
     /* Check for GOB Start Code */
     val = show_bits(&s->gb, 16);
-    if (val == 0) {
+    if(val)
+        return -1;
+
         /* We have a GBSC probably with GSTUFF */
-        skip_bits(&s->gb, 16); /* Drop the zeros */
-        while (get_bits1(&s->gb) == 0); /* Seek the '1' bit */
+    skip_bits(&s->gb, 16); /* Drop the zeros */
+    left= s->gb.size*8 - get_bits_count(&s->gb);
+    //MN: we must check the bits left or we might end in a infinite loop (or segfault)
+    for(;left>13; left--){
+        if(get_bits1(&s->gb)) break; /* Seek the '1' bit */
+    }
+    if(left<=13) 
+        return -1;
+
 #ifdef DEBUG
-        fprintf(stderr,"\nGOB Start Code at MB %d\n", (s->mb_y * s->mb_width) + s->mb_x);
+    fprintf(stderr,"\nGOB Start Code at MB %d\n", (s->mb_y * s->mb_width) + s->mb_x);
 #endif
-        s->gob_number = get_bits(&s->gb, 5); /* GN */
-        gfid = get_bits(&s->gb, 2); /* GFID */
-        s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+    s->gob_number = get_bits(&s->gb, 5); /* GN */
+    gfid = get_bits(&s->gb, 2); /* GFID */
+    s->qscale = get_bits(&s->gb, 5); /* GQUANT */
+    if(s->qscale==0) 
+        return -1;
+    s->mb_x= 0;
+    s->mb_y= s->gob_index* s->gob_number;
 #ifdef DEBUG
-        fprintf(stderr, "\nGN: %u GFID: %u Quant: %u\n", s->gob_number, gfid, s->qscale);
+    fprintf(stderr, "\nGN: %u GFID: %u Quant: %u\n", s->gob_number, gfid, s->qscale);
 #endif
-        return 1;
-    }
     return 0;
-            
 }
 
 static inline void memsetw(short *tab, int val, int n)
@@ -1940,7 +2201,7 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s)
         s->i_tex_bits+= tex_pb_len;
     }else{
         put_bits(&s->pb, 17, MOTION_MARKER);
-        s->misc_bits+=17 + pb2_len;;
+        s->misc_bits+=17 + pb2_len;
         s->mv_bits+= bits - s->last_bits;
         s->p_tex_bits+= tex_pb_len;
     }
@@ -1953,131 +2214,149 @@ void ff_mpeg4_merge_partitions(MpegEncContext *s)
     s->last_bits= get_bit_count(&s->pb);
 }
 
+int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s){
+    switch(s->pict_type){
+        case I_TYPE:
+            return 16;
+        case P_TYPE:
+        case S_TYPE:
+            return s->f_code+15;
+        case B_TYPE:
+            return FFMAX(FFMAX(s->f_code, s->b_code)+15, 17);
+        default:
+            return -1;
+    }
+}
+
 void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
 {
     int mb_num_bits= av_log2(s->mb_num - 1) + 1;
 
     ff_mpeg4_stuffing(&s->pb);
-    if(s->pict_type==I_TYPE)
-        put_bits(&s->pb, 16, 0);
-    else if(s->pict_type==B_TYPE)
-        put_bits(&s->pb, MAX(MAX(s->f_code, s->b_code)+15, 17), 0);
-    else /* S/P_TYPE */
-        put_bits(&s->pb, s->f_code+15, 0);
+    put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0);
     put_bits(&s->pb, 1, 1);
     
     put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
-    put_bits(&s->pb, 5, s->qscale);
+    put_bits(&s->pb, s->quant_precision, s->qscale);
     put_bits(&s->pb, 1, 0); /* no HEC */
 }
 
 /**
- * decodes the next video packet and sets s->next_qscale 
- * returns mb_num of the next packet or <0 if something went wrong
+ * check if the next stuff is a resync marker or the end.
+ * @return 0 if not
+ */
+static inline int mpeg4_is_resync(MpegEncContext *s){
+    const int bits_count= get_bits_count(&s->gb);
+    
+    if(s->workaround_bugs&FF_BUG_NO_PADDING){
+        return 0;
+    }
+
+    if(bits_count + 8 >= s->gb.size*8){
+        int v= show_bits(&s->gb, 8);
+        v|= 0x7F >> (7-(bits_count&7));
+                
+        if(v==0x7F)
+            return 1;
+    }else{
+        if(show_bits(&s->gb, 16) == ff_mpeg4_resync_prefix[bits_count&7]){
+            int len;
+            GetBitContext gb= s->gb;
+        
+            skip_bits(&s->gb, 1);
+            align_get_bits(&s->gb);
+        
+            for(len=0; len<32; len++){
+                if(get_bits1(&s->gb)) break;
+            }
+
+            s->gb= gb;
+
+            if(len>=ff_mpeg4_get_video_packet_prefix_length(s))
+                return 1;
+        }
+    }
+    return 0;
+}
+
+/**
+ * decodes the next video packet.
+ * @return <0 if something went wrong
  */
-static int decode_video_packet_header(MpegEncContext *s, GetBitContext *gb)
+static int mpeg4_decode_video_packet_header(MpegEncContext *s)
 {
-    int bits;
     int mb_num_bits= av_log2(s->mb_num - 1) + 1;
-    int header_extension=0, mb_num;
-//printf("%X\n", show_bits(&gb, 24));
-//printf("parse_video_packet_header\n");
-//    if(show_aligned_bits(gb, 1, 16) != 0) return -1;
+    int header_extension=0, mb_num, len;
     
     /* is there enough space left for a video packet + header */
-    if( get_bits_count(gb) > gb->size*8-20) return -1;
+    if( get_bits_count(&s->gb) > s->gb.size*8-20) return -1;
 
-//printf("resync at %d %d\n", s->mb_x, s->mb_y);
-//    skip_bits(gb, 1);
-//    align_get_bits(gb);
-    if(get_bits(gb, 16)!=0){
-        printf("internal error while decoding video packet header\n");
+    for(len=0; len<32; len++){
+        if(get_bits1(&s->gb)) break;
     }
 
-//printf("%X\n", show_bits(gb, 24));
-    bits=0;
-    while(!get_bits1(gb) && bits<30) bits++;
-    if((s->pict_type == P_TYPE || s->pict_type == S_TYPE) && bits != s->f_code-1){
-        printf("marker does not match f_code (is: %d should be: %d pos: %d end %d x: %d y: %d)\n", 
-               bits+1, s->f_code, get_bits_count(gb), gb->size*8, s->mb_x, s->mb_y);
-        return -1;
-    }else if(s->pict_type == I_TYPE && bits != 0){
-        printf("marker too long\n");
-        return -1;
-    }else if(s->pict_type == B_TYPE && bits != MAX(MAX(s->f_code, s->b_code)-1, 1)){
-        printf("marker does not match f/b_code\n");
+    if(len!=ff_mpeg4_get_video_packet_prefix_length(s)){
+        printf("marker does not match f_code\n");
         return -1;
     }
-//printf("%X\n", show_bits(gb, 24));
-
+    
     if(s->shape != RECT_SHAPE){
-        header_extension= get_bits1(gb);
+        header_extension= get_bits1(&s->gb);
         //FIXME more stuff here
     }
 
-    mb_num= get_bits(gb, mb_num_bits);
-    if(mb_num < s->mb_x + s->mb_y*s->mb_width || mb_num>=s->mb_num){
-        fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_x + s->mb_y*s->mb_width);
+    mb_num= get_bits(&s->gb, mb_num_bits);
+    if(mb_num>=s->mb_num){
+        fprintf(stderr, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num);
         return -1;
     }
+    s->mb_x= mb_num % s->mb_width;
+    s->mb_y= mb_num / s->mb_width;
 
     if(s->shape != BIN_ONLY_SHAPE){
-        s->next_resync_qscale= get_bits(gb, 5);
-        if(s->next_resync_qscale==0)
-            s->next_resync_qscale= s->qscale;
-        if(s->next_resync_qscale==0){
-            fprintf(stderr, "qscale==0\n");
-            return -1;
-        }
+        int qscale= get_bits(&s->gb, s->quant_precision); 
+        if(qscale)
+            s->qscale= qscale;
     }
 
     if(s->shape == RECT_SHAPE){
-        header_extension= get_bits1(gb);
+        header_extension= get_bits1(&s->gb);
     }
     if(header_extension){
         int time_increment;
         int time_incr=0;
-        printf("header extension not supported\n");
-        return -1;
 
-        while (get_bits1(gb) != 0) 
+        while (get_bits1(&s->gb) != 0) 
             time_incr++;
 
-        check_marker(gb, "before time_increment in video packed header");
-        time_increment= get_bits(gb, s->time_increment_bits);
-        if(s->pict_type!=B_TYPE){
-            s->last_time_base= s->time_base;
-            s->time_base+= time_incr;
-            s->time= s->time_base*s->time_increment_resolution + time_increment;
-            s->pp_time= s->time - s->last_non_b_time;
-            s->last_non_b_time= s->time;
-        }else{
-            s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
-            s->bp_time= s->last_non_b_time - s->time;
-        }
-        check_marker(gb, "before vop_coding_type in video packed header");
+        check_marker(&s->gb, "before time_increment in video packed header");
+        time_increment= get_bits(&s->gb, s->time_increment_bits);
+        check_marker(&s->gb, "before vop_coding_type in video packed header");
         
-        skip_bits(gb, 2); /* vop coding type */
+        skip_bits(&s->gb, 2); /* vop coding type */
         //FIXME not rect stuff here
 
         if(s->shape != BIN_ONLY_SHAPE){
-            skip_bits(gb, 3); /* intra dc vlc threshold */
-
-            if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE && s->num_sprite_warping_points){
+            skip_bits(&s->gb, 3); /* intra dc vlc threshold */
+//FIXME dont just ignore everything
+            if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
                 mpeg4_decode_sprite_trajectory(s);
+                fprintf(stderr, "untested\n");
             }
 
             //FIXME reduced res stuff here
             
             if (s->pict_type != I_TYPE) {
-                s->f_code = get_bits(gb, 3);   /* fcode_for */
-                if(s->f_code==0){
-                    printf("Error, video packet header damaged or not MPEG4 header (f_code=0)\n");
-                    return -1; // makes no sense to continue, as the MV decoding will break very quickly
+                int f_code = get_bits(&s->gb, 3);      /* fcode_for */
+                if(f_code==0){
+                    printf("Error, video packet header damaged (f_code=0)\n");
                 }
             }
             if (s->pict_type == B_TYPE) {
-                s->b_code = get_bits(gb, 3);
+                int b_code = get_bits(&s->gb, 3);
+                if(b_code==0){
+                    printf("Error, video packet header damaged (b_code=0)\n");
+                }
             }       
         }
     }
@@ -2085,7 +2364,7 @@ static int decode_video_packet_header(MpegEncContext *s, GetBitContext *gb)
     
 //printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
 
-    return mb_num;
+    return 0;
 }
 
 void ff_mpeg4_clean_buffers(MpegEncContext *s)
@@ -2097,10 +2376,12 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
     c_wrap= s->block_wrap[4];
     c_xy= s->mb_y*c_wrap + s->mb_x;
 
+#if 0
     /* clean DC */
     memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1);
     memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1);
     memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1);
+#endif
 
     /* clean AC */
     memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(INT16));
@@ -2117,84 +2398,113 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
     s->last_mv[1][0][1]= 0;
 }
 
-/* searches for the next resync marker clears ac,dc,mc, and sets s->next_resync_gb, s->mb_num_left */
-int ff_mpeg4_resync(MpegEncContext *s)
-{
-    GetBitContext gb;
+/**
+ * decodes the group of blocks / video packet header.
+ * @return <0 if no resync found
+ */
+int ff_h263_resync(MpegEncContext *s){
+    int left, ret;
     
-    /* search & parse next resync marker */
+    if(s->codec_id==CODEC_ID_MPEG4)
+        skip_bits1(&s->gb);
     
-    gb= s->next_resync_gb;
-    align_get_bits(&gb);
-//printf("mpeg4_resync %d next:%d \n", get_bits_count(&gb), get_bits_count(&s->next_resync_gb));
-    for(;;) {
-        int v= show_bits(&gb, 24);
-        if( get_bits_count(&gb) >= gb.size*8-24 || v == 1 /* start-code */){
-            s->mb_num_left= s->mb_num - s->mb_x - s->mb_y*s->mb_width;
-//printf("mpeg4_resync end\n");
-            s->gb= s->next_resync_gb; //continue at the next resync marker
-            return -1;
-        }else if(v>>8 == 0){
-            int next;
-            s->next_resync_pos= get_bits_count(&gb);
-            
-            next= decode_video_packet_header(s, &gb);
-            if(next >= 0){
-                s->mb_num_left= next - s->mb_x - s->mb_y*s->mb_width;
-                break;
-            }
+    align_get_bits(&s->gb);
+
+    if(show_bits(&s->gb, 16)==0){
+        if(s->codec_id==CODEC_ID_MPEG4)
+            ret= mpeg4_decode_video_packet_header(s);
+        else
+            ret= h263_decode_gob_header(s);
+        if(ret>=0)
+            return 0;
+    }
+    //ok, its not where its supposed to be ...
+    s->gb= s->last_resync_gb;
+    align_get_bits(&s->gb);
+    left= s->gb.size*8 - get_bits_count(&s->gb);
+    
+    for(;left>16+1+5+5; left-=8){ 
+        if(show_bits(&s->gb, 16)==0){
+            GetBitContext bak= s->gb;
 
-            align_get_bits(&gb);
+            if(s->codec_id==CODEC_ID_MPEG4)
+                ret= mpeg4_decode_video_packet_header(s);
+            else
+                ret= h263_decode_gob_header(s);
+            if(ret>=0)
+                return 0;
+
+            s->gb= bak;
         }
-        skip_bits(&gb, 8);
+        skip_bits(&s->gb, 8);
     }
-    s->next_resync_gb=gb;
     
-    return 0;
+    return -1;
 }
 
-static inline void init_block_index(MpegEncContext *s)
-{
-    s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
-    s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
-    s->block_index[2]= s->block_wrap[0]*(s->mb_y*2 + 2) - 1 + s->mb_x*2;
-    s->block_index[3]= s->block_wrap[0]*(s->mb_y*2 + 2)     + s->mb_x*2;
-    s->block_index[4]= s->block_wrap[4]*(s->mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
-    s->block_index[5]= s->block_wrap[4]*(s->mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2) + s->mb_x;
-}
+/**
+ * @param n either 0 for the x component or 1 for y
+ * @returns the average MV for a GMC MB
+ */
+static inline int get_amv(MpegEncContext *s, int n){
+    int x, y, mb_v, sum, dx, dy, shift;
+    int len = 1 << (s->f_code + 4);
+    const int a= s->sprite_warping_accuracy;
+
+    if(s->real_sprite_warping_points==1){
+        if(s->divx_version==500 && s->divx_build==413)
+            sum= s->sprite_offset[0][n] / (1<<(a - s->quarter_sample));
+        else
+            sum= RSHIFT(s->sprite_offset[0][n]<<s->quarter_sample, a);
+    }else{
+        dx= s->sprite_delta[n][0];
+        dy= s->sprite_delta[n][1];
+        shift= s->sprite_shift[0];
+        if(n) dy -= 1<<(shift + a + 1);
+        else  dx -= 1<<(shift + a + 1);
+        mb_v= s->sprite_offset[0][n] + dx*s->mb_x*16 + dy*s->mb_y*16;
+
+        sum=0;
+        for(y=0; y<16; y++){
+            int v;
+        
+            v= mb_v + dy*y;
+            //XXX FIXME optimize
+            for(x=0; x<16; x++){
+                sum+= v>>shift;
+                v+= dx;
+            }
+        }
+        sum /= 256;
+        sum= RSHIFT(sum<<s->quarter_sample, a);
+    }
 
-static inline void update_block_index(MpegEncContext *s)
-{
-    s->block_index[0]+=2;
-    s->block_index[1]+=2;
-    s->block_index[2]+=2;
-    s->block_index[3]+=2;
-    s->block_index[4]++;
-    s->block_index[5]++;
+    if      (sum < -len) sum= -len;
+    else if (sum >= len) sum= len-1;
+
+    return sum;
 }
 
 /**
- * decodes the first & second partition
- * returns error type or 0 if no error
+ * decodes first partition.
+ * @return number of MBs decoded or <0 if an error occured
  */
-int ff_mpeg4_decode_partitions(MpegEncContext *s)
-{
-    static const INT8 quant_tab[4] = { -1, -2, 1, 2 };
+static int mpeg4_decode_partition_a(MpegEncContext *s){
     int mb_num;
+    static const INT8 quant_tab[4] = { -1, -2, 1, 2 };
     
     /* decode first partition */
     mb_num=0;
     s->first_slice_line=1;
-    s->mb_x= s->resync_mb_x;
-    for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
-        init_block_index(s);
-        for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+    for(; s->mb_y<s->mb_height; s->mb_y++){
+        ff_init_block_index(s);
+        for(; s->mb_x<s->mb_width; s->mb_x++){
             const int xy= s->mb_x + s->mb_y*s->mb_width;
             int cbpc;
             int dir=0;
             
             mb_num++;
-            update_block_index(s);
+            ff_update_block_index(s);
             if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
                 s->first_slice_line=0;
             
@@ -2203,23 +2513,23 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
             if(s->pict_type==I_TYPE){
                 int i;
 
+                if(show_bits(&s->gb, 19)==DC_MARKER){
+                    return mb_num-1;
+                }
+
                 PRINT_MB_TYPE("I");
                 cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 1);
                 if (cbpc < 0){
+
                     fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
-                    return DECODING_DESYNC;
+                    return -1;
                 }
                 s->cbp_table[xy]= cbpc & 3;
                 s->mb_type[xy]= MB_TYPE_INTRA;
                 s->mb_intra = 1;
 
                 if(cbpc & 4) {
-                    s->qscale += quant_tab[get_bits(&s->gb, 2)];
-                    if (s->qscale < 1)
-                        s->qscale = 1;
-                    else if (s->qscale > 31)
-                        s->qscale = 31;
-                    h263_dc_scale(s);
+                    change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
                 }
                 s->qscale_table[xy]= s->qscale;
 
@@ -2229,35 +2539,34 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                     int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
                     if(dc < 0){
                         fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return DECODING_DESYNC;
+                        return -1;
                     }
                     dir<<=1;
                     if(dc_pred_dir) dir|=1;
                 }
                 s->pred_dir_table[xy]= dir;
+                
+                s->error_status_table[xy]= AC_ERROR;
             }else{ /* P/S_TYPE */
-                int mx, my, pred_x, pred_y;
+                int mx, my, pred_x, pred_y, bits;
                 INT16 * const mot_val= s->motion_val[s->block_index[0]];
                 const int stride= s->block_wrap[0]*2;
 
-                if(get_bits1(&s->gb)){
+                bits= show_bits(&s->gb, 17);
+                if(bits==MOTION_MARKER){
+                    return mb_num-1;
+                }
+                skip_bits1(&s->gb);
+                if(bits&0x10000){
                     /* skip mb */
                     s->mb_type[xy]= MB_TYPE_SKIPED;
                     if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
-                        const int a= s->sprite_warping_accuracy;
                         PRINT_MB_TYPE("G");
-                        if(s->divx_version==500 && s->divx_build==413){
-                            mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
-                            my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
-                        }else{
-                            mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-                            my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-                            s->mb_type[xy]= MB_TYPE_GMC | MB_TYPE_SKIPED;
-                        }
+                        mx= get_amv(s, 0);
+                        my= get_amv(s, 1);
                     }else{
                         PRINT_MB_TYPE("S");
-                        mx = 0;
-                        my = 0;
+                        mx=my=0;
                     }
                     mot_val[0       ]= mot_val[2       ]=
                     mot_val[0+stride]= mot_val[2+stride]= mx;
@@ -2267,12 +2576,13 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                     if(s->mbintra_table[xy])
                         ff_clean_intra_table_entries(s);
 
+                    s->error_status_table[xy]= AC_ERROR;
                     continue;
                 }
                 cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
                 if (cbpc < 0){
                     fprintf(stderr, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
-                    return DECODING_DESYNC;
+                    return -1;
                 }
                 if (cbpc > 20)
                     cbpc+=3;
@@ -2290,6 +2600,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                     mot_val[0+stride]= mot_val[2+stride]= 0;
                     mot_val[1       ]= mot_val[3       ]=
                     mot_val[1+stride]= mot_val[3+stride]= 0;
+                    s->error_status_table[xy]= DC_ERROR|AC_ERROR;
                 }else{
                     if(s->mbintra_table[xy])
                         ff_clean_intra_table_entries(s);
@@ -2304,31 +2615,19 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                         s->mb_type[xy]= MB_TYPE_INTER;
 
                         h263_pred_motion(s, 0, &pred_x, &pred_y);
-                        if(!s->mcsel)
-                           mx = h263_decode_motion(s, pred_x, s->f_code);
-                        else {
-                            const int a= s->sprite_warping_accuracy;
-                            if(s->divx_version==500 && s->divx_build==413){
-                                mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
-                            }else{
-                                mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-                            }
-                        }
-                        if (mx >= 0xffff)
-                            return DECODING_DESYNC;
-            
-                        if(!s->mcsel)
-                           my = h263_decode_motion(s, pred_y, s->f_code);
-                        else{
-                           const int a= s->sprite_warping_accuracy;
-                            if(s->divx_version==500 && s->divx_build==413){
-                                my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
-                            }else{
-                                my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-                            }
+                        if(!s->mcsel){
+                            mx = h263_decode_motion(s, pred_x, s->f_code);
+                            if (mx >= 0xffff)
+                                return -1;
+
+                            my = h263_decode_motion(s, pred_y, s->f_code);
+                            if (my >= 0xffff)
+                                return -1;
+                        } else {
+                            mx = get_amv(s, 0);
+                            my = get_amv(s, 1);
                         }
-                        if (my >= 0xffff)
-                            return DECODING_DESYNC;
+
                         mot_val[0       ]= mot_val[2       ] =
                         mot_val[0+stride]= mot_val[2+stride]= mx;
                         mot_val[1       ]= mot_val[3       ]=
@@ -2341,45 +2640,51 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                             INT16 *mot_val= h263_pred_motion(s, i, &pred_x, &pred_y);
                             mx = h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
-                                return DECODING_DESYNC;
+                                return -1;
                 
                             my = h263_decode_motion(s, pred_y, s->f_code);
                             if (my >= 0xffff)
-                                return DECODING_DESYNC;
+                                return -1;
                             mot_val[0] = mx;
                             mot_val[1] = my;
                         }
                     }
+                    s->error_status_table[xy]= AC_ERROR;
                 }
             }
         }
         s->mb_x= 0;
     }
 
-    if     (s->pict_type==I_TYPE && get_bits(&s->gb, 19)!=DC_MARKER    ) s->decoding_error= DECODING_DESYNC;
-    else if(s->pict_type!=I_TYPE && get_bits(&s->gb, 17)!=MOTION_MARKER) s->decoding_error= DECODING_DESYNC;
-    if(s->decoding_error== DECODING_DESYNC){
-        fprintf(stderr, "marker missing after first partition at %d %d\n", s->mb_x, s->mb_y);
-        return DECODING_DESYNC;
-    }
+    return mb_num;
+}
+
+/**
+ * decode second partition.
+ * @return <0 if an error occured
+ */
+static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
+    int mb_num=0;
+    static const INT8 quant_tab[4] = { -1, -2, 1, 2 };
 
-    /* decode second partition */
-    mb_num=0;
     s->mb_x= s->resync_mb_x;
-    for(s->mb_y= s->resync_mb_y; mb_num < s->mb_num_left; s->mb_y++){
-        init_block_index(s);
-        for(; mb_num < s->mb_num_left && s->mb_x<s->mb_width; s->mb_x++){
+    s->first_slice_line=1;
+    for(s->mb_y= s->resync_mb_y; mb_num < mb_count; s->mb_y++){
+        ff_init_block_index(s);
+        for(; mb_num < mb_count && s->mb_x<s->mb_width; s->mb_x++){
             const int xy= s->mb_x + s->mb_y*s->mb_width;
 
             mb_num++;
-            update_block_index(s);
+            ff_update_block_index(s);
+            if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
+                s->first_slice_line=0;
             
             if(s->pict_type==I_TYPE){
                 int ac_pred= get_bits1(&s->gb);
                 int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
                 if(cbpy<0){
                     fprintf(stderr, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                    return DECODING_AC_LOST;
+                    return -1;
                 }
                 
                 s->cbp_table[xy]|= cbpy<<2;
@@ -2392,16 +2697,11 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
 
                     if(cbpy<0){
                         fprintf(stderr, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return DECODING_ACDC_LOST;
+                        return -1;
                     }
                     
                     if(s->cbp_table[xy] & 8) {
-                        s->qscale += quant_tab[get_bits(&s->gb, 2)];
-                        if (s->qscale < 1)
-                            s->qscale = 1;
-                        else if (s->qscale > 31)
-                            s->qscale = 31;
-                        h263_dc_scale(s);
+                        change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
                     }
                     s->qscale_table[xy]= s->qscale;
 
@@ -2410,7 +2710,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                         int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
                         if(dc < 0){
                             fprintf(stderr, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
-                            return DECODING_ACDC_LOST;
+                            return -1;
                         }
                         dir<<=1;
                         if(dc_pred_dir) dir|=1;
@@ -2418,6 +2718,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                     s->cbp_table[xy]&= 3; //remove dquant
                     s->cbp_table[xy]|= cbpy<<2;
                     s->pred_dir_table[xy]= dir | (ac_pred<<7);
+                    s->error_status_table[xy]&= ~DC_ERROR;
                 }else if(s->mb_type[xy]&MB_TYPE_SKIPED){
                     s->qscale_table[xy]= s->qscale;
                     s->cbp_table[xy]= 0;
@@ -2426,17 +2727,11 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
 
                     if(cbpy<0){
                         fprintf(stderr, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
-                        return DECODING_ACDC_LOST;
+                        return -1;
                     }
                     
                     if(s->cbp_table[xy] & 8) {
-//fprintf(stderr, "dquant\n");
-                        s->qscale += quant_tab[get_bits(&s->gb, 2)];
-                        if (s->qscale < 1)
-                            s->qscale = 1;
-                        else if (s->qscale > 31)
-                            s->qscale = 31;
-                        h263_dc_scale(s);
+                        change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
                     }
                     s->qscale_table[xy]= s->qscale;
 
@@ -2445,49 +2740,72 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
                 }
             }
         }
+        if(mb_num >= mb_count) return 0;
         s->mb_x= 0;
     }
+    return 0;
+}
+
+/**
+ * decodes the first & second partition
+ * @return <0 if error (and sets error type in the error_status_table)
+ */
+int ff_mpeg4_decode_partitions(MpegEncContext *s)
+{
+    int mb_num;
+    
+    mb_num= mpeg4_decode_partition_a(s);    
+    if(mb_num<0)
+        return -1;
+    
+    if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){
+        fprintf(stderr, "slice below monitor ...\n");
+        return -1;
+    }
+
+    s->mb_num_left= mb_num;
+        
+    if(s->pict_type==I_TYPE){
+        if(get_bits(&s->gb, 19)!=DC_MARKER){
+            fprintf(stderr, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }else
+            s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= MV_END|DC_END;
+    }else{
+        if(get_bits(&s->gb, 17)!=MOTION_MARKER){
+            fprintf(stderr, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y);
+            return -1;
+        }else
+            s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= MV_END;
+    }
     
+    if( mpeg4_decode_partition_b(s, mb_num) < 0){
+        return -1;
+    }
+    
+    s->error_status_table[s->mb_x + s->mb_y*s->mb_width-1]|= DC_END;
 
     return 0;        
 }
 
-static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
-                   DCTELEM block[6][64])
+/**
+ * decode partition C of one MB.
+ * @return <0 if an error occured
+ */
+static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
     int cbp, mb_type;
     const int xy= s->mb_x + s->mb_y*s->mb_width;
 
-    if(s->mb_x==s->resync_mb_x && s->mb_y==s->resync_mb_y){ //Note resync_mb_{x,y}==0 at the start
-        int i;
-        int block_index_backup[6];
-        int qscale= s->qscale;
-        
-        for(i=0; i<6; i++) block_index_backup[i]= s->block_index[i];
-        
-        s->decoding_error= ff_mpeg4_decode_partitions(s);
-        
-        for(i=0; i<6; i++) s->block_index[i]= block_index_backup[i];
-        s->first_slice_line=1;
-        s->mb_x= s->resync_mb_x;
-        s->mb_y= s->resync_mb_y;
-        s->qscale= qscale;
-        h263_dc_scale(s);
-
-        if(s->decoding_error==DECODING_DESYNC) return -1;
-    }
-    
     mb_type= s->mb_type[xy];
-    if(s->decoding_error)
-        cbp=0;
-    else 
-        cbp = s->cbp_table[xy];
+    cbp = s->cbp_table[xy];
 
-    if(s->decoding_error!=DECODING_ACDC_LOST && s->qscale_table[xy] != s->qscale){
+    if(s->qscale_table[xy] != s->qscale){
         s->qscale= s->qscale_table[xy];
-        h263_dc_scale(s);
+        s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
+        s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
     }
-
+    
     if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
         int i;
         for(i=0; i<4; i++){
@@ -2509,22 +2827,16 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
                 s->mcsel=0;
                 s->mb_skiped = 1;
             }
-            return 0;
-        }else if(s->mb_intra && s->decoding_error!=DECODING_ACDC_LOST){
+        }else if(s->mb_intra){
             s->ac_pred = s->pred_dir_table[xy]>>7;
 
             /* decode each block */
             for (i = 0; i < 6; i++) {
-                int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1);
-                if(ret==DECODING_AC_LOST){
-                    fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
-                    s->decoding_error=DECODING_AC_LOST;
-                    cbp=0;
-                }else if(ret==DECODING_ACDC_LOST){
-                    fprintf(stderr, "dc corrupted at %d %d (trying to continue with mc only)\n", s->mb_x, s->mb_y);
-                    s->decoding_error=DECODING_ACDC_LOST;
-                    break;
+                if(mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0){
+                    fprintf(stderr, "texture corrupted at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
                 }
+                cbp+=cbp;
             }
         }else if(!s->mb_intra){
 //            s->mcsel= 0; //FIXME do we need to init that
@@ -2535,16 +2847,13 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
             } else {
                 s->mv_type = MV_TYPE_16X16;
             }
-            if(s->decoding_error==0 && cbp){
-                /* decode each block */
-                for (i = 0; i < 6; i++) {
-                    int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0);
-                    if(ret==DECODING_AC_LOST){
-                        fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
-                        s->decoding_error=DECODING_AC_LOST;
-                        break;
-                    }
+            /* decode each block */
+            for (i = 0; i < 6; i++) {
+                if(mpeg4_decode_block(s, block[i], i, cbp&32, 0) < 0){
+                    fprintf(stderr, "texture corrupted at %d %d (trying to continue with mc/dc only)\n", s->mb_x, s->mb_y);
+                    return -1;
                 }
+                cbp+=cbp;
             }
         }
     } else { /* I-Frame */
@@ -2554,69 +2863,42 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s,
         
         /* decode each block */
         for (i = 0; i < 6; i++) {
-            int ret= mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1);
-            if(ret==DECODING_AC_LOST){
+            if(mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0){
                 fprintf(stderr, "texture corrupted at %d %d (trying to continue with dc only)\n", s->mb_x, s->mb_y);
-                s->decoding_error=DECODING_AC_LOST;
-                cbp=0;
-            }else if(ret==DECODING_ACDC_LOST){
-                fprintf(stderr, "dc corrupted at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
+            cbp+=cbp;
         }
     }
 
-    return 0;
-}
-#if 0
-static inline void decode_interlaced_info(MpegEncContext *s, int cbp, int mb_type){
-    s->mv_type= 0;            
-    if(!s->progressive_sequence){
-        if(cbp || s->mb_intra)
-            s->interlaced_dct= get_bits1(&s->gb);
-        
-        if(!s->mb_intra){
-            if(   s->pict_type==P_TYPE //FIXME check that 4MV is forbidden
-               || (s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && !s->mcsel)
-               || (s->pict_type==B_TYPE && mb_type!=0) ){
+    s->error_status_table[xy]&= ~AC_ERROR;
 
-                if(get_bits1(&s->gb)){
-                    s->mv_type= MV_TYPE_FIELD;
+    /* per-MB end of slice check */
 
-                    if(   s->pict_type==P_TYPE
-                       || (s->pict_type==B_TYPE && mb_type!=2)){
-                        s->field_select[0][0]= get_bits1(&s->gb);
-                        s->field_select[0][1]= get_bits1(&s->gb);
-                    }
-                    if(s->pict_type==B_TYPE && mb_type!=3){
-                        s->field_select[1][0]= get_bits1(&s->gb);
-                        s->field_select[1][1]= get_bits1(&s->gb);
-                    }
-                }else
-                    s->mv_type= 0;            
-            }
-        }   
+    if(--s->mb_num_left <= 0){
+//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size*8 - get_bits_count(&s->gb));
+        if(mpeg4_is_resync(s))
+            return SLICE_END;
+        else
+            return SLICE_NOEND;     
+    }else{
+        if(s->cbp_table[xy+1] && mpeg4_is_resync(s))
+            return SLICE_END;
+        else
+            return SLICE_OK;
     }
 }
-#endif
 
-int h263_decode_mb(MpegEncContext *s,
-                   DCTELEM block[6][64])
+int ff_h263_decode_mb(MpegEncContext *s,
+                      DCTELEM block[6][64])
 {
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
     INT16 *mot_val;
     static INT8 quant_tab[4] = { -1, -2, 1, 2 };
 
-    if(s->mb_x==0) PRINT_MB_TYPE("\n");
-
-    if(s->resync_marker){
-        if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
-            s->first_slice_line=0; 
-        }
-    }
+    s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0;
 
-    if(s->data_partitioning && s->pict_type!=B_TYPE)
-        return mpeg4_decode_partitioned_mb(s, block);
+    if(s->mb_x==0) PRINT_MB_TYPE("\n");
 
     if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
         if (get_bits1(&s->gb)) {
@@ -2627,21 +2909,10 @@ int h263_decode_mb(MpegEncContext *s,
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
             if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
-                const int a= s->sprite_warping_accuracy;
-//                int l = (1 << (s->f_code - 1)) * 32;
                 PRINT_MB_TYPE("G");
                 s->mcsel=1;
-                if(s->divx_version==500 && s->divx_build==413){
-                    s->mv[0][0][0] = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
-                    s->mv[0][0][1] = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
-                }else{
-                    s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-                    s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-                }
-/*                if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l;
-                else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1;
-                if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l;
-                else if (s->mv[0][0][1] >= l) s->mv[0][0][1]= l-1;*/
+                s->mv[0][0][0]= get_amv(s, 0);
+                s->mv[0][0][1]= get_amv(s, 1);
 
                 s->mb_skiped = 0;
             }else{
@@ -2651,7 +2922,7 @@ int h263_decode_mb(MpegEncContext *s,
                 s->mv[0][0][1] = 0;
                 s->mb_skiped = 1;
             }
-            return 0;
+            goto end;
         }
         cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
         //fprintf(stderr, "\tCBPC: %d", cbpc);
@@ -2672,32 +2943,19 @@ int h263_decode_mb(MpegEncContext *s,
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         cbp = (cbpc & 3) | ((cbpy ^ 0xf) << 2);
         if (dquant) {
-            s->qscale += quant_tab[get_bits(&s->gb, 2)];
-            if (s->qscale < 1)
-                s->qscale = 1;
-            else if (s->qscale > 31)
-                s->qscale = 31;
-            h263_dc_scale(s);
+            change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
         }
-        if((!s->progressive_sequence) && (cbp || s->workaround_bugs==2))
+        if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE)))
             s->interlaced_dct= get_bits1(&s->gb);
         
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
             if(s->mcsel){
-                const int a= s->sprite_warping_accuracy;
                 PRINT_MB_TYPE("G");
                 /* 16x16 global motion prediction */
                 s->mv_type = MV_TYPE_16X16;
-//        int l = (1 << (s->f_code - 1)) * 32;
-                if(s->divx_version==500 && s->divx_build==413){
-                    mx = s->sprite_offset[0][0] / (1<<(a-s->quarter_sample));
-                    my = s->sprite_offset[0][1] / (1<<(a-s->quarter_sample));
-                }else{
-                    mx = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
-                    my = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
-                }
-//       int l = (1 << (s->f_code - 1)) * 32;
+                mx= get_amv(s, 0);
+                my= get_amv(s, 1);
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }else if((!s->progressive_sequence) && get_bits1(&s->gb)){
@@ -2720,7 +2978,7 @@ int h263_decode_mb(MpegEncContext *s,
                         return -1;
 
                     s->mv[0][i][0] = mx;
-                    s->mv[0][i][1] = my*2;
+                    s->mv[0][i][1] = my;
                 }
             }else{
                 PRINT_MB_TYPE("P");
@@ -2786,10 +3044,12 @@ int h263_decode_mb(MpegEncContext *s,
         s->mcsel=0;      //     ...               true gmc blocks
 
         if(s->mb_x==0){
-            s->last_mv[0][0][0]= 
-            s->last_mv[0][0][1]= 
-            s->last_mv[1][0][0]= 
-            s->last_mv[1][0][1]= 0;
+            for(i=0; i<2; i++){
+                s->last_mv[i][0][0]= 
+                s->last_mv[i][0][1]= 
+                s->last_mv[i][1][0]= 
+                s->last_mv[i][1][1]= 0;
+            }
         }
 
         /* if we skipped it in the future P Frame than skip it now too */
@@ -2807,102 +3067,161 @@ int h263_decode_mb(MpegEncContext *s,
             s->mv[1][0][0] = 0;
             s->mv[1][0][1] = 0;
             PRINT_MB_TYPE("s");
-            return 0;
+            goto end;
         }
 
-        modb1= get_bits1(&s->gb);
-        if(modb1==0){
+        modb1= get_bits1(&s->gb); 
+        if(modb1){
+            mb_type=4; //like MB_TYPE_B_DIRECT but no vectors coded
+            cbp=0;
+        }else{
+            int field_mv;
+        
             modb2= get_bits1(&s->gb);
             mb_type= get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1);
-            if(modb2==0) cbp= get_bits(&s->gb, 6);
-            else cbp=0;
-            if (mb_type && cbp) {
+            if(modb2) cbp= 0;
+            else      cbp= get_bits(&s->gb, 6);
+
+            if (mb_type!=MB_TYPE_B_DIRECT && cbp) {
                 if(get_bits1(&s->gb)){
-                    s->qscale +=get_bits1(&s->gb)*4 - 2;
-                    if (s->qscale < 1)
-                        s->qscale = 1;
-                    else if (s->qscale > 31)
-                        s->qscale = 31;
-                    h263_dc_scale(s);
+                    change_qscale(s, get_bits1(&s->gb)*4 - 2);
                 }
             }
-//            decode_interlaced_info(s, cbp, mb_type);
-        }else{
-            mb_type=4; //like 0 but no vectors coded
-            cbp=0;
+            field_mv=0;
+
+            if(!s->progressive_sequence){
+                if(cbp)
+                    s->interlaced_dct= get_bits1(&s->gb);
+
+                if(mb_type!=MB_TYPE_B_DIRECT && get_bits1(&s->gb)){
+                    field_mv=1;
+
+                    if(mb_type!=MB_TYPE_B_BACKW){
+                        s->field_select[0][0]= get_bits1(&s->gb);
+                        s->field_select[0][1]= get_bits1(&s->gb);
+                    }
+                    if(mb_type!=MB_TYPE_B_FORW){
+                        s->field_select[1][0]= get_bits1(&s->gb);
+                        s->field_select[1][1]= get_bits1(&s->gb);
+                    }
+                }
+            }
+
+            s->mv_dir = 0;
+            if(mb_type!=MB_TYPE_B_DIRECT && !field_mv){
+                s->mv_type= MV_TYPE_16X16;
+                if(mb_type!=MB_TYPE_B_BACKW){
+                    s->mv_dir = MV_DIR_FORWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
+                    my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
+                    s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
+                    s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my;
+                }
+    
+                if(mb_type!=MB_TYPE_B_FORW){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
+                    my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
+                    s->last_mv[1][1][0]= s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
+                    s->last_mv[1][1][1]= s->last_mv[1][0][1]= s->mv[1][0][1] = my;
+                }
+                if(mb_type!=MB_TYPE_B_DIRECT)
+                    PRINT_MB_TYPE(mb_type==MB_TYPE_B_FORW ? "F" : (mb_type==MB_TYPE_B_BACKW ? "B" : "T"));
+            }else if(mb_type!=MB_TYPE_B_DIRECT){
+                s->mv_type= MV_TYPE_FIELD;
+
+                if(mb_type!=MB_TYPE_B_BACKW){
+                    s->mv_dir = MV_DIR_FORWARD;
+                
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[0][i][0]  , s->f_code);
+                        my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code);
+                        s->last_mv[0][i][0]=  s->mv[0][i][0] = mx;
+                        s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2;
+                    }
+                }
+    
+                if(mb_type!=MB_TYPE_B_FORW){
+                    s->mv_dir |= MV_DIR_BACKWARD;
+
+                    for(i=0; i<2; i++){
+                        mx = h263_decode_motion(s, s->last_mv[1][i][0]  , s->b_code);
+                        my = h263_decode_motion(s, s->last_mv[1][i][1]/2, s->b_code);
+                        s->last_mv[1][i][0]=  s->mv[1][i][0] = mx;
+                        s->last_mv[1][i][1]= (s->mv[1][i][1] = my)*2;
+                    }
+                }
+                if(mb_type!=MB_TYPE_B_DIRECT)
+                    PRINT_MB_TYPE(mb_type==MB_TYPE_B_FORW ? "f" : (mb_type==MB_TYPE_B_BACKW ? "b" : "t"));
+            }
         }
-        s->mv_type = MV_TYPE_16X16; // we'll switch to 8x8 only if the last P frame had 8x8 for this MB and mb_type=0 here
-        mx=my=0; //for case 4, we could put this to the mb_type=4 but than gcc compains about uninitalized mx/my
-        switch(mb_type)
-        {
-        case 0: /* direct */
-            mx = h263_decode_motion(s, 0, 1);
-            my = h263_decode_motion(s, 0, 1);
-        case 4: /* direct with mx=my=0 */
+          
+        if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){
+            int mb_index= s->mb_x + s->mb_y*s->mb_width;
+            int i;
+            
+            if(mb_type==4)
+                mx=my=0;
+            else{
+                mx = h263_decode_motion(s, 0, 1);
+                my = h263_decode_motion(s, 0, 1);
+            }
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             xy= s->block_index[0];
             time_pp= s->pp_time;
-            time_pb= time_pp - s->bp_time;
-//if(time_pp>3000 )printf("%d %d  ", time_pp, time_pb);
+            time_pb= s->pb_time;
+            
             //FIXME avoid divides
-            s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
-            s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
-            s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
-                                : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp + mx;
-            s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
-                                : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp + my;
-            if(s->non_b_mv4_table[xy]){
-                int i;
+            switch(s->co_located_type_table[mb_index]){
+            case 0:
+                s->mv_type= MV_TYPE_16X16;
+                s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+                s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+                s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
+                                    : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
+                s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] 
+                                    : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
+                PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
+                break;
+            case CO_LOCATED_TYPE_4MV:
                 s->mv_type = MV_TYPE_8X8;
-                for(i=1; i<4; i++){
+                for(i=0; i<4; i++){
                     xy= s->block_index[i];
                     s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
                     s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
                     s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
-                                        : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp + mx;
+                                        : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
                     s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] 
-                                        : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp + my;
+                                        : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
                 }
                 PRINT_MB_TYPE("4");
-            }else{
-                PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
+                break;
+            case CO_LOCATED_TYPE_FIELDMV:
+                s->mv_type = MV_TYPE_FIELD;
+                for(i=0; i<2; i++){
+                    if(s->top_field_first){
+                        time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
+                        time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
+                    }else{
+                        time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
+                        time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
+                    }
+                    s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
+                    s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
+                    s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
+                                        : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
+                    s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] 
+                                        : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
+                }
+                PRINT_MB_TYPE("=");
+                break;
             }
-/*            s->mv[0][0][0] = 
-            s->mv[0][0][1] = 
-            s->mv[1][0][0] = 
-            s->mv[1][0][1] = 1000;*/
-            break;
-//FIXME additional MVs for interlaced stuff
-        case 1: 
-            s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-            mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
-            my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
-            s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
-            s->last_mv[0][0][1]= s->mv[0][0][1] = my;
-
-            mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
-            my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
-            s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
-            s->last_mv[1][0][1]= s->mv[1][0][1] = my;
-            PRINT_MB_TYPE("i");
-            break;
-        case 2: 
-            s->mv_dir = MV_DIR_BACKWARD;
-            mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code);
-            my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code);
-            s->last_mv[1][0][0]= s->mv[1][0][0] = mx;
-            s->last_mv[1][0][1]= s->mv[1][0][1] = my;
-            PRINT_MB_TYPE("B");
-            break;
-        case 3:
-            s->mv_dir = MV_DIR_FORWARD;
-            mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code);
-            my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code);
-            s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
-            s->last_mv[0][0][1]= s->mv[0][0][1] = my;
-            PRINT_MB_TYPE("F");
-            break;
-        default: 
+        }
+        
+        if(mb_type<0 || mb_type>4){
             printf("illegal MB_type\n");
             return -1;
         }
@@ -2925,56 +3244,76 @@ intra:
         if(cbpy<0) return -1;
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
-            s->qscale += quant_tab[get_bits(&s->gb, 2)];
-            if (s->qscale < 1)
-                s->qscale = 1;
-            else if (s->qscale > 31)
-                s->qscale = 31;
-            h263_dc_scale(s);
+            change_qscale(s, quant_tab[get_bits(&s->gb, 2)]);
         }
+        
         if(!s->progressive_sequence)
             s->interlaced_dct= get_bits1(&s->gb);
 
         /* decode each block */
         if (s->h263_pred) {
             for (i = 0; i < 6; i++) {
-                if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 1) < 0)
+                if (mpeg4_decode_block(s, block[i], i, cbp&32, 1) < 0)
                     return -1;
+                cbp+=cbp;
             }
         } else {
             for (i = 0; i < 6; i++) {
-                if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+                if (h263_decode_block(s, block[i], i, cbp&32) < 0)
                     return -1;
+                cbp+=cbp;
             }
         }
-        return 0;
+        goto end;
     }
 
     /* decode each block */
     if (s->h263_pred) {
         for (i = 0; i < 6; i++) {
-            if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, 0) < 0)
+            if (mpeg4_decode_block(s, block[i], i, cbp&32, 0) < 0)
                 return -1;
+            cbp+=cbp;
         }
     } else {
         for (i = 0; i < 6; i++) {
-            if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+            if (h263_decode_block(s, block[i], i, cbp&32) < 0)
                 return -1;
+            cbp+=cbp;
         }
     }
-    return 0;
+end:
+
+        /* per-MB end of slice check */
+    if(s->codec_id==CODEC_ID_MPEG4){
+        if(mpeg4_is_resync(s)){
+            if(s->pict_type==B_TYPE && s->mbskip_table[s->mb_y * s->mb_width + s->mb_x+1])
+                return SLICE_OK;
+            return SLICE_END;
+        }
+    }else{
+        if(get_bits_count(&s->gb) + 7 >= s->gb.size*8){
+            int v= show_bits(&s->gb, 8) >> (((get_bits_count(&s->gb)-1)&7)+1);
+            if(v==0)
+                return SLICE_END;
+        }else{
+            if(show_bits(&s->gb, 16)==0)
+                return SLICE_END; 
+        }
+    }
+
+    return SLICE_OK;     
 }
 
 static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 {
-    int code, val, sign, shift, l, m;
-
+    int code, val, sign, shift, l;
     code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2);
     if (code < 0)
         return 0xffff;
 
     if (code == 0)
         return pred;
+
     sign = get_bits1(&s->gb);
     shift = f_code - 1;
     val = (code - 1) << shift;
@@ -2987,12 +3326,11 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 
     /* modulo decoding */
     if (!s->h263_long_vectors) {
-        l = (1 << (f_code - 1)) * 32;
-        m = 2 * l;
+        l = 1 << (f_code + 4);
         if (val < -l) {
-            val += m;
+            val += l<<1;
         } else if (val >= l) {
-            val -= m;
+            val -= l<<1;
         }
     } else {
         /* horrible h263 long vector mode */
@@ -3038,15 +3376,15 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
     RLTable *rl = &rl_inter;
     const UINT8 *scan_table;
 
-    scan_table = zigzag_direct;
+    scan_table = s->intra_scantable.permutated;
     if (s->h263_aic && s->mb_intra) {
         rl = &rl_intra_aic;
         i = 0;
         if (s->ac_pred) {
             if (s->h263_aic_dir) 
-                scan_table = ff_alternate_vertical_scan; /* left */
+                scan_table = s->intra_v_scantable.permutated; /* left */
             else
-                scan_table = ff_alternate_horizontal_scan; /* top */
+                scan_table = s->intra_h_scantable.permutated; /* top */
         }
     } else if (s->mb_intra) {
         /* DC coef */
@@ -3083,8 +3421,10 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
 
     for(;;) {
         code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-        if (code < 0)
+        if (code < 0){
+            fprintf(stderr, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y);
             return -1;
+        }
         if (code == rl->n) {
             /* escape */
             last = get_bits1(&s->gb);
@@ -3093,7 +3433,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
             if (s->h263_rv10 && level == -128) {
                 /* XXX: should patch encoder too */
                 level = get_bits(&s->gb, 12);
-                level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension
+               level= (level + ((-1)<<11)) ^ ((-1)<<11); //sign extension
             }
         } else {
             run = rl->table_run[code];
@@ -3103,8 +3443,10 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
                 level = -level;
         }
         i += run;
-        if (i >= 64)
+        if (i >= 64){
+            fprintf(stderr, "run overflow at %dx%d\n", s->mb_x, s->mb_y);
             return -1;
+        }
         j = scan_table[i];
         block[j] = level;
         if (last)
@@ -3141,29 +3483,39 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
             level = - (level ^ ((1 << code) - 1));
         if (code > 8){
             if(get_bits1(&s->gb)==0){ /* marker */
-                fprintf(stderr, "dc marker bit missing\n");
-                return -1;
+                if(s->error_resilience>=2){
+                    fprintf(stderr, "dc marker bit missing\n");
+                    return -1;
+                }
             }
         }
     }
-
     pred = ff_mpeg4_pred_dc(s, n, &dc_val, dir_ptr);
     level += pred;
-    if (level < 0)
+    if (level < 0){
+        if(s->error_resilience>=3){
+            fprintf(stderr, "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
         level = 0;
+    }
     if (n < 4) {
         *dc_val = level * s->y_dc_scale;
     } else {
         *dc_val = level * s->c_dc_scale;
     }
+    if(s->error_resilience>=3){
+        if(*dc_val > 2048 + s->y_dc_scale + s->c_dc_scale){
+            fprintf(stderr, "dc overflow at %dx%d\n", s->mb_x, s->mb_y);
+            return -1;
+        }
+    }
     return level;
 }
 
 /**
- * decode a block
- * returns 0 if everything went ok
- * returns DECODING_AC_LOST   if an error was detected during AC decoding
- * returns DECODING_ACDC_LOST if an error was detected during DC decoding
+ * decodes a block.
+ * @return <0 if an error occured
  */
 static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int coded, int intra)
@@ -3177,7 +3529,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 
     if(intra) {
        /* DC coef */
-        if(s->data_partitioning && s->pict_type!=B_TYPE){
+        if(s->partitioned_frame){
             level = s->dc_val[0][ s->block_index[n] ];
             if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
             else    level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
@@ -3185,7 +3537,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         }else{
             level = mpeg4_decode_dc(s, n, &dc_pred_dir);
             if (level < 0)
-                return DECODING_ACDC_LOST;
+                return -1;
         }
         block[0] = level;
         i = 0;
@@ -3195,11 +3547,11 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         rl_vlc = rl_intra.rl_vlc[0];
         if (s->ac_pred) {
             if (dc_pred_dir == 0) 
-                scan_table = ff_alternate_vertical_scan; /* left */
+                scan_table = s->intra_v_scantable.permutated; /* left */
             else
-                scan_table = ff_alternate_horizontal_scan; /* top */
+                scan_table = s->intra_h_scantable.permutated; /* top */
         } else {
-            scan_table = zigzag_direct;
+            scan_table = s->intra_scantable.permutated;
         }
         qmul=1;
         qadd=0;
@@ -3210,7 +3562,9 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             return 0;
         }
         rl = &rl_inter;
-        scan_table = zigzag_direct;
+   
+        scan_table = s->intra_scantable.permutated;
+
         if(s->mpeg_quant){
             qmul=1;
             qadd=0;
@@ -3241,38 +3595,38 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 
                     if(SHOW_UBITS(re, &s->gb, 1)==0){
                         fprintf(stderr, "1. marker bit missing in 3. esc\n");
-                        return DECODING_AC_LOST;
+                        return -1;
                     }; SKIP_CACHE(re, &s->gb, 1);
                     
                     level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12);
  
                     if(SHOW_UBITS(re, &s->gb, 1)==0){
                         fprintf(stderr, "2. marker bit missing in 3. esc\n");
-                        return DECODING_AC_LOST;
+                        return -1;
                     }; LAST_SKIP_CACHE(re, &s->gb, 1);
                     
                     SKIP_COUNTER(re, &s->gb, 1+12+1);
                     
                     if(level*s->qscale>1024 || level*s->qscale<-1024){
                         fprintf(stderr, "|level| overflow in 3. esc, qp=%d\n", s->qscale);
-                        return DECODING_AC_LOST;
+                        return -1;
                     }
 #if 1 
                     {
                         const int abs_level= ABS(level);
-                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN && s->error_resilience>=0){
+                        if(abs_level<=MAX_LEVEL && run<=MAX_RUN && ((s->workaround_bugs&FF_BUG_AC_VLC)==0)){
                             const int run1= run - rl->max_run[last][abs_level] - 1;
                             if(abs_level <= rl->max_level[last][run]){
                                 fprintf(stderr, "illegal 3. esc, vlc encoding possible\n");
-                                return DECODING_AC_LOST;
+                                return -1;
                             }
                             if(abs_level <= rl->max_level[last][run]*2){
                                 fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n");
-                                return DECODING_AC_LOST;
+                                return -1;
                             }
                             if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){
                                 fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n");
-                                return DECODING_AC_LOST;
+                                return -1;
                             }
                         }
                     }
@@ -3318,7 +3672,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             i-= 192;
             if(i&(~63)){
                 fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
-                return DECODING_AC_LOST;
+                return -1;
             }
 
             block[scan_table[i]] = level;
@@ -3475,10 +3829,10 @@ int h263_decode_picture_header(MpegEncContext *s)
                 skip_bits1(&s->gb);
                 height = get_bits(&s->gb, 9) * 4;
                 dprintf("\nH.263+ Custom picture: %dx%d\n",width,height);
-                if (s->aspect_ratio_info == EXTENDED_PAR) {
+                if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
                     /* aspected dimensions */
-                    skip_bits(&s->gb, 8); /* width */
-                    skip_bits(&s->gb, 8); /* height */
+                   s->aspected_width = get_bits(&s->gb, 8);
+                   s->aspected_height = get_bits(&s->gb, 8);
                 }
             } else {
                 width = h263_format[format][0];
@@ -3522,11 +3876,12 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
     int d[4][2]={{0,0}, {0,0}, {0,0}, {0,0}};
     int sprite_ref[4][2];
     int virtual_ref[2][2];
-    int w2, h2;
+    int w2, h2, w3, h3;
     int alpha=0, beta=0;
     int w= s->width;
     int h= s->height;
-//printf("SP %d\n", s->sprite_warping_accuracy);
+    int min_ab;
+
     for(i=0; i<s->num_sprite_warping_points; i++){
         int length;
         int x=0, y=0;
@@ -3534,7 +3889,7 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
         length= get_vlc(&s->gb, &sprite_trajectory);
         if(length){
             x= get_bits(&s->gb, length);
-//printf("lx %d %d\n", length, x);
+
             if ((x >> (length - 1)) == 0) /* if MSB not set it is negative*/
                 x = - (x ^ ((1 << length) - 1));
         }
@@ -3543,14 +3898,12 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
         length= get_vlc(&s->gb, &sprite_trajectory);
         if(length){
             y=get_bits(&s->gb, length);
-//printf("ly %d %d\n", length, y);
+
             if ((y >> (length - 1)) == 0) /* if MSB not set it is negative*/
                 y = - (y ^ ((1 << length) - 1));
         }
         skip_bits1(&s->gb); /* marker bit */
 //printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy);
-//if(i>0 && (x!=0 || y!=0)) printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n");
-//x=y=0;
         d[i][0]= x;
         d[i][1]= y;
     }
@@ -3591,7 +3944,7 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
     virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) 
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
-
+        
     switch(s->num_sprite_warping_points)
     {
         case 0:
@@ -3599,417 +3952,508 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s)
             s->sprite_offset[0][1]= 0;
             s->sprite_offset[1][0]= 0;
             s->sprite_offset[1][1]= 0;
-            s->sprite_delta[0][0][0]= a;
-            s->sprite_delta[0][0][1]= 0;
-            s->sprite_delta[0][1][0]= 0;
-            s->sprite_delta[0][1][1]= a;
-            s->sprite_delta[1][0][0]= a;
-            s->sprite_delta[1][0][1]= 0;
-            s->sprite_delta[1][1][0]= 0;
-            s->sprite_delta[1][1][1]= a;
-            s->sprite_shift[0][0]= 0;
-            s->sprite_shift[0][1]= 0;
-            s->sprite_shift[1][0]= 0;
-            s->sprite_shift[1][1]= 0;
+            s->sprite_delta[0][0]= a;
+            s->sprite_delta[0][1]= 0;
+            s->sprite_delta[1][0]= 0;
+            s->sprite_delta[1][1]= a;
+            s->sprite_shift[0]= 0;
+            s->sprite_shift[1]= 0;
             break;
         case 1: //GMC only
             s->sprite_offset[0][0]= sprite_ref[0][0] - a*vop_ref[0][0];
             s->sprite_offset[0][1]= sprite_ref[0][1] - a*vop_ref[0][1];
             s->sprite_offset[1][0]= ((sprite_ref[0][0]>>1)|(sprite_ref[0][0]&1)) - a*(vop_ref[0][0]/2);
             s->sprite_offset[1][1]= ((sprite_ref[0][1]>>1)|(sprite_ref[0][1]&1)) - a*(vop_ref[0][1]/2);
-            s->sprite_delta[0][0][0]= a;
-            s->sprite_delta[0][0][1]= 0;
-            s->sprite_delta[0][1][0]= 0;
-            s->sprite_delta[0][1][1]= a;
-            s->sprite_delta[1][0][0]= a;
-            s->sprite_delta[1][0][1]= 0;
-            s->sprite_delta[1][1][0]= 0;
-            s->sprite_delta[1][1][1]= a;
-            s->sprite_shift[0][0]= 0;
-            s->sprite_shift[0][1]= 0;
-            s->sprite_shift[1][0]= 0;
-            s->sprite_shift[1][1]= 0;
+            s->sprite_delta[0][0]= a;
+            s->sprite_delta[0][1]= 0;
+            s->sprite_delta[1][0]= 0;
+            s->sprite_delta[1][1]= a;
+            s->sprite_shift[0]= 0;
+            s->sprite_shift[1]= 0;
             break;
         case 2:
-        case 3: //FIXME
             s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+rho))
-                                                  + ((-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0])
-                                                    +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1]));
+                                                  + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0])
+                                                  + ( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1])
+                                                  + (1<<(alpha+rho-1));
             s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+rho))
-                                                  + ((-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0])
-                                                    +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1]));
-            s->sprite_offset[1][0]= ((-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1)
-                                 +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1)
-                                 +2*w2*r*sprite_ref[0][0] - 16*w2);
-            s->sprite_offset[1][1]= ((-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) 
-                                 +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1)
-                                 +2*w2*r*sprite_ref[0][1] - 16*w2);
-            s->sprite_delta[0][0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            s->sprite_delta[0][0][1]=   ( r*sprite_ref[0][1] - virtual_ref[0][1]);
-            s->sprite_delta[0][1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1]);
-            s->sprite_delta[0][1][1]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            s->sprite_delta[1][0][0]= 4*(-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            s->sprite_delta[1][0][1]= 4*( r*sprite_ref[0][1] - virtual_ref[0][1]);
-            s->sprite_delta[1][1][0]= 4*(-r*sprite_ref[0][1] + virtual_ref[0][1]);
-            s->sprite_delta[1][1][1]= 4*(-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            s->sprite_shift[0][0]= alpha+rho;
-            s->sprite_shift[0][1]= alpha+rho;
-            s->sprite_shift[1][0]= alpha+rho+2;
-            s->sprite_shift[1][1]= alpha+rho+2;
+                                                  + (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0])
+                                                  + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1])
+                                                  + (1<<(alpha+rho-1));
+            s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1)
+                                     +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1)
+                                     +2*w2*r*sprite_ref[0][0] 
+                                     - 16*w2 
+                                     + (1<<(alpha+rho+1)));
+            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) 
+                                     +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1)
+                                     +2*w2*r*sprite_ref[0][1] 
+                                     - 16*w2
+                                     + (1<<(alpha+rho+1)));
+            s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
+            s->sprite_delta[0][1]=   (+r*sprite_ref[0][1] - virtual_ref[0][1]);
+            s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1]);
+            s->sprite_delta[1][1]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
+            
+            s->sprite_shift[0]= alpha+rho;
+            s->sprite_shift[1]= alpha+rho+2;
             break;
-//        case 3:
+        case 3:
+            min_ab= FFMIN(alpha, beta);
+            w3= w2>>min_ab;
+            h3= h2>>min_ab;
+            s->sprite_offset[0][0]=  (sprite_ref[0][0]<<(alpha+beta+rho-min_ab))
+                                   + (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-vop_ref[0][0])
+                                   + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-vop_ref[0][1])
+                                   + (1<<(alpha+beta+rho-min_ab-1));
+            s->sprite_offset[0][1]=  (sprite_ref[0][1]<<(alpha+beta+rho-min_ab))
+                                   + (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-vop_ref[0][0])
+                                   + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-vop_ref[0][1])
+                                   + (1<<(alpha+beta+rho-min_ab-1));
+            s->sprite_offset[1][0]=  (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-2*vop_ref[0][0] + 1)
+                                   + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-2*vop_ref[0][1] + 1)
+                                   + 2*w2*h3*r*sprite_ref[0][0]
+                                   - 16*w2*h3
+                                   + (1<<(alpha+beta+rho-min_ab+1));
+            s->sprite_offset[1][1]=  (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-2*vop_ref[0][0] + 1)
+                                   + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-2*vop_ref[0][1] + 1)
+                                   + 2*w2*h3*r*sprite_ref[0][1]
+                                   - 16*w2*h3
+                                   + (1<<(alpha+beta+rho-min_ab+1));
+            s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3;
+            s->sprite_delta[0][1]=   (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3;
+            s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3;
+            s->sprite_delta[1][1]=   (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3;
+                                   
+            s->sprite_shift[0]= alpha + beta + rho - min_ab;
+            s->sprite_shift[1]= alpha + beta + rho - min_ab + 2;
             break;
     }
-/*printf("%d %d\n", s->sprite_delta[0][0][0], a<<s->sprite_shift[0][0]);
-printf("%d %d\n", s->sprite_delta[0][0][1], 0);
-printf("%d %d\n", s->sprite_delta[0][1][0], 0);
-printf("%d %d\n", s->sprite_delta[0][1][1], a<<s->sprite_shift[0][1]);
-printf("%d %d\n", s->sprite_delta[1][0][0], a<<s->sprite_shift[1][0]);
-printf("%d %d\n", s->sprite_delta[1][0][1], 0);
-printf("%d %d\n", s->sprite_delta[1][1][0], 0);
-printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
     /* try to simplify the situation */ 
-    if(   s->sprite_delta[0][0][0] == a<<s->sprite_shift[0][0]
-       && s->sprite_delta[0][0][1] == 0
-       && s->sprite_delta[0][1][0] == 0
-       && s->sprite_delta[0][1][1] == a<<s->sprite_shift[0][1]
-       && s->sprite_delta[1][0][0] == a<<s->sprite_shift[1][0]
-       && s->sprite_delta[1][0][1] == 0
-       && s->sprite_delta[1][1][0] == 0
-       && s->sprite_delta[1][1][1] == a<<s->sprite_shift[1][1])
+    if(   s->sprite_delta[0][0] == a<<s->sprite_shift[0]
+       && s->sprite_delta[0][1] == 0
+       && s->sprite_delta[1][0] == 0
+       && s->sprite_delta[1][1] == a<<s->sprite_shift[0])
     {
-        s->sprite_offset[0][0]>>=s->sprite_shift[0][0];
-        s->sprite_offset[0][1]>>=s->sprite_shift[0][1];
-        s->sprite_offset[1][0]>>=s->sprite_shift[1][0];
-        s->sprite_offset[1][1]>>=s->sprite_shift[1][1];
-        s->sprite_delta[0][0][0]= a;
-        s->sprite_delta[0][0][1]= 0;
-        s->sprite_delta[0][1][0]= 0;
-        s->sprite_delta[0][1][1]= a;
-        s->sprite_delta[1][0][0]= a;
-        s->sprite_delta[1][0][1]= 0;
-        s->sprite_delta[1][1][0]= 0;
-        s->sprite_delta[1][1][1]= a;
-        s->sprite_shift[0][0]= 0;
-        s->sprite_shift[0][1]= 0;
-        s->sprite_shift[1][0]= 0;
-        s->sprite_shift[1][1]= 0;
+        s->sprite_offset[0][0]>>=s->sprite_shift[0];
+        s->sprite_offset[0][1]>>=s->sprite_shift[0];
+        s->sprite_offset[1][0]>>=s->sprite_shift[1];
+        s->sprite_offset[1][1]>>=s->sprite_shift[1];
+        s->sprite_delta[0][0]= a;
+        s->sprite_delta[0][1]= 0;
+        s->sprite_delta[1][0]= 0;
+        s->sprite_delta[1][1]= a;
+        s->sprite_shift[0]= 0;
+        s->sprite_shift[1]= 0;
         s->real_sprite_warping_points=1;
     }
-    else
+    else{
+        int shift_y= 16 - s->sprite_shift[0];
+        int shift_c= 16 - s->sprite_shift[1];
+//printf("shifts %d %d\n", shift_y, shift_c);
+        for(i=0; i<2; i++){
+            s->sprite_offset[0][i]<<= shift_y;
+            s->sprite_offset[1][i]<<= shift_c;
+            s->sprite_delta[0][i]<<= shift_y;
+            s->sprite_delta[1][i]<<= shift_y;
+            s->sprite_shift[i]= 16;
+        }
         s->real_sprite_warping_points= s->num_sprite_warping_points;
-
-//printf("%d %d %d %d\n", d[0][0], d[0][1], s->sprite_offset[0][0], s->sprite_offset[0][1]);
+    }
+#if 0
+printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n",
+    vop_ref[0][0], vop_ref[0][1],
+    vop_ref[1][0], vop_ref[1][1],
+    vop_ref[2][0], vop_ref[2][1],
+    sprite_ref[0][0], sprite_ref[0][1], 
+    sprite_ref[1][0], sprite_ref[1][1], 
+    sprite_ref[2][0], sprite_ref[2][1], 
+    virtual_ref[0][0], virtual_ref[0][1], 
+    virtual_ref[1][0], virtual_ref[1][1]
+    );
+    
+printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n",
+    s->sprite_offset[0][0], s->sprite_offset[0][1],
+    s->sprite_delta[0][0], s->sprite_delta[0][1],
+    s->sprite_delta[1][0], s->sprite_delta[1][1],
+    s->sprite_shift[0]
+    );
+#endif
 }
 
-/* decode mpeg4 VOP header */
-int mpeg4_decode_picture_header(MpegEncContext * s)
-{
-    int time_incr, startcode, state, v;
-    int time_increment;
+static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
+    int width, height, vo_ver_id;
 
- redo:
-    /* search next start code */
-    align_get_bits(&s->gb);
-    state = 0xff;
-    for(;;) {
-        v = get_bits(&s->gb, 8);
-        if (state == 0x000001) {
-            state = ((state << 8) | v) & 0xffffff;
-            startcode = state;
-            break;
-        }
-        state = ((state << 8) | v) & 0xffffff;
-        if( get_bits_count(&s->gb) > s->gb.size*8-32){
-            if(s->gb.size>50){
-                printf("no VOP startcode found, frame size was=%d\n", s->gb.size);
-                return -1;
-            }else{
-                printf("frame skip\n");
-                return FRAME_SKIPED;
-            }
-        }
+    /* vol header */
+    skip_bits(gb, 1); /* random access */
+    s->vo_type= get_bits(gb, 8);
+    if (get_bits1(gb) != 0) { /* is_ol_id */
+        vo_ver_id = get_bits(gb, 4); /* vo_ver_id */
+        skip_bits(gb, 3); /* vo_priority */
+    } else {
+        vo_ver_id = 1;
+    }
+//printf("vo type:%d\n",s->vo_type);
+    s->aspect_ratio_info= get_bits(gb, 4);
+    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){        
+        s->aspected_width = get_bits(gb, 8); // par_width
+        s->aspected_height = get_bits(gb, 8); // par_height
     }
-//printf("startcode %X %d\n", startcode, get_bits_count(&s->gb));
-    if (startcode == 0x120) { // Video Object Layer
-        int width, height, vo_ver_id;
 
-        /* vol header */
-        skip_bits(&s->gb, 1); /* random access */
-        s->vo_type= get_bits(&s->gb, 8);
-        if (get_bits1(&s->gb) != 0) { /* is_ol_id */
-            vo_ver_id = get_bits(&s->gb, 4); /* vo_ver_id */
-            skip_bits(&s->gb, 3); /* vo_priority */
-        } else {
-            vo_ver_id = 1;
+    if ((s->vol_control_parameters=get_bits1(gb))) { /* vol control parameter */
+        int chroma_format= get_bits(gb, 2);
+        if(chroma_format!=1){
+            printf("illegal chroma format\n");
         }
-//printf("vo type:%d\n",s->vo_type);
-        s->aspect_ratio_info= get_bits(&s->gb, 4);
-       if(s->aspect_ratio_info == EXTENDED_PAR){
-            skip_bits(&s->gb, 8); //par_width
-            skip_bits(&s->gb, 8); // par_height
+        s->low_delay= get_bits1(gb);
+        if(get_bits1(gb)){ /* vbv parameters */
+            get_bits(gb, 15);  /* first_half_bitrate */
+            skip_bits1(gb);    /* marker */
+            get_bits(gb, 15);  /* latter_half_bitrate */
+            skip_bits1(gb);    /* marker */
+            get_bits(gb, 15);  /* first_half_vbv_buffer_size */
+            skip_bits1(gb);    /* marker */
+            get_bits(gb, 3);   /* latter_half_vbv_buffer_size */
+            get_bits(gb, 11);  /* first_half_vbv_occupancy */
+            skip_bits1(gb);    /* marker */
+            get_bits(gb, 15);  /* latter_half_vbv_occupancy */
+            skip_bits1(gb);    /* marker */               
         }
+    }else{
+        // set low delay flag only once so the smart? low delay detection wont be overriden
+        if(s->picture_number==0)
+            s->low_delay=0;
+    }
 
-        if ((s->vol_control_parameters=get_bits1(&s->gb))) { /* vol control parameter */
-            int chroma_format= get_bits(&s->gb, 2);
-            if(chroma_format!=1){
-                printf("illegal chroma format\n");
-            }
-            s->low_delay= get_bits1(&s->gb);
-            if(get_bits1(&s->gb)){ /* vbv parameters */
-                printf("vbv parameters not supported\n");
-                return -1;
+    s->shape = get_bits(gb, 2); /* vol shape */
+    if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n");
+    if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
+        printf("Gray shape not supported\n");
+        skip_bits(gb, 4);  //video_object_layer_shape_extension
+    }
+
+    skip_bits1(gb);   /* marker */
+    
+    s->time_increment_resolution = get_bits(gb, 16);
+    
+    s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
+    if (s->time_increment_bits < 1)
+        s->time_increment_bits = 1;
+    skip_bits1(gb);   /* marker */
+
+    if (get_bits1(gb) != 0) {   /* fixed_vop_rate  */
+        skip_bits(gb, s->time_increment_bits);
+    }
+
+    if (s->shape != BIN_ONLY_SHAPE) {
+        if (s->shape == RECT_SHAPE) {
+            skip_bits1(gb);   /* marker */
+            width = get_bits(gb, 13);
+            skip_bits1(gb);   /* marker */
+            height = get_bits(gb, 13);
+            skip_bits1(gb);   /* marker */
+            if(width && height){ /* they should be non zero but who knows ... */
+                s->width = width;
+                s->height = height;
+//                printf("width/height: %d %d\n", width, height);
             }
-        }else{
-            // set low delay flag only once so the smart? low delay detection wont be overriden
-            if(s->picture_number==0)
-                s->low_delay=0;
         }
-
-        s->shape = get_bits(&s->gb, 2); /* vol shape */
-        if(s->shape != RECT_SHAPE) printf("only rectangular vol supported\n");
-        if(s->shape == GRAY_SHAPE && vo_ver_id != 1){
-            printf("Gray shape not supported\n");
-            skip_bits(&s->gb, 4);  //video_object_layer_shape_extension
+        
+        s->progressive_sequence= get_bits1(gb)^1;
+        if(!get_bits1(gb)) printf("OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
+        if (vo_ver_id == 1) {
+            s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */
+        } else {
+            s->vol_sprite_usage = get_bits(gb, 2); /* vol_sprite_usage */
         }
-
-        skip_bits1(&s->gb);   /* marker */
+        if(s->vol_sprite_usage==STATIC_SPRITE) printf("Static Sprites not supported\n");
+        if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){
+            if(s->vol_sprite_usage==STATIC_SPRITE){
+                s->sprite_width = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_height= get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_left  = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+                s->sprite_top   = get_bits(gb, 13);
+                skip_bits1(gb); /* marker */
+            }
+            s->num_sprite_warping_points= get_bits(gb, 6);
+            s->sprite_warping_accuracy = get_bits(gb, 2);
+            s->sprite_brightness_change= get_bits1(gb);
+            if(s->vol_sprite_usage==STATIC_SPRITE)
+                s->low_latency_sprite= get_bits1(gb);            
+        }
+        // FIXME sadct disable bit if verid!=1 && shape not rect
         
-        s->time_increment_resolution = get_bits(&s->gb, 16);
-        s->time_increment_bits = av_log2(s->time_increment_resolution - 1) + 1;
-        if (s->time_increment_bits < 1)
-            s->time_increment_bits = 1;
-        skip_bits1(&s->gb);   /* marker */
-
-        if (get_bits1(&s->gb) != 0) {   /* fixed_vop_rate  */
-            skip_bits(&s->gb, s->time_increment_bits);
+        if (get_bits1(gb) == 1) {   /* not_8_bit */
+            s->quant_precision = get_bits(gb, 4); /* quant_precision */
+            if(get_bits(gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */
+            if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision);
+        } else {
+            s->quant_precision = 5;
         }
+        
+        // FIXME a bunch of grayscale shape things
 
-        if (s->shape != BIN_ONLY_SHAPE) {
-            if (s->shape == RECT_SHAPE) {
-                skip_bits1(&s->gb);   /* marker */
-                width = get_bits(&s->gb, 13);
-                skip_bits1(&s->gb);   /* marker */
-                height = get_bits(&s->gb, 13);
-                skip_bits1(&s->gb);   /* marker */
-                if(width && height){ /* they should be non zero but who knows ... */
-                    s->width = width;
-                    s->height = height;
-//                    printf("width/height: %d %d\n", width, height);
-                }
-            }
-            
-            s->progressive_sequence= get_bits1(&s->gb)^1;
-            if(!get_bits1(&s->gb)) printf("OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
-            if (vo_ver_id == 1) {
-                s->vol_sprite_usage = get_bits1(&s->gb); /* vol_sprite_usage */
-            } else {
-                s->vol_sprite_usage = get_bits(&s->gb, 2); /* vol_sprite_usage */
-            }
-            if(s->vol_sprite_usage==STATIC_SPRITE) printf("Static Sprites not supported\n");
-            if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){
-                if(s->vol_sprite_usage==STATIC_SPRITE){
-                    s->sprite_width = get_bits(&s->gb, 13);
-                    skip_bits1(&s->gb); /* marker */
-                    s->sprite_height= get_bits(&s->gb, 13);
-                    skip_bits1(&s->gb); /* marker */
-                    s->sprite_left  = get_bits(&s->gb, 13);
-                    skip_bits1(&s->gb); /* marker */
-                    s->sprite_top   = get_bits(&s->gb, 13);
-                    skip_bits1(&s->gb); /* marker */
-                }
-                s->num_sprite_warping_points= get_bits(&s->gb, 6);
-                s->sprite_warping_accuracy = get_bits(&s->gb, 2);
-                s->sprite_brightness_change= get_bits1(&s->gb);
-                if(s->vol_sprite_usage==STATIC_SPRITE)
-                    s->low_latency_sprite= get_bits1(&s->gb);            
-            }
-            // FIXME sadct disable bit if verid!=1 && shape not rect
+        if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */
+            int i, j, v;
             
-            if (get_bits1(&s->gb) == 1) {   /* not_8_bit */
-                s->quant_precision = get_bits(&s->gb, 4); /* quant_precision */
-                if(get_bits(&s->gb, 4)!=8) printf("N-bit not supported\n"); /* bits_per_pixel */
-                if(s->quant_precision!=5) printf("quant precission %d\n", s->quant_precision);
-            } else {
-                s->quant_precision = 5;
+            /* load default matrixes */
+            for(i=0; i<64; i++){
+                int j= s->idct_permutation[i];
+                v= ff_mpeg4_default_intra_matrix[i];
+                s->intra_matrix[j]= v;
+                s->chroma_intra_matrix[j]= v;
+                
+                v= ff_mpeg4_default_non_intra_matrix[i];
+                s->inter_matrix[j]= v;
+                s->chroma_inter_matrix[j]= v;
             }
-            
-            // FIXME a bunch of grayscale shape things
 
-            if((s->mpeg_quant=get_bits1(&s->gb))){ /* vol_quant_type */
-                int i, j, v;
-                
-                /* load default matrixes */
+            /* load custom intra matrix */
+            if(get_bits1(gb)){
+                int last=0;
                 for(i=0; i<64; i++){
-                    v= ff_mpeg4_default_intra_matrix[i];
-                    s->intra_matrix[i]= v;
-                    s->chroma_intra_matrix[i]= v;
+                    v= get_bits(gb, 8);
+                    if(v==0) break;
                     
-                    v= ff_mpeg4_default_non_intra_matrix[i];
-                    s->inter_matrix[i]= v;
-                    s->chroma_inter_matrix[i]= v;
+                    last= v;
+                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    s->intra_matrix[j]= v;
+                    s->chroma_intra_matrix[j]= v;
                 }
 
-                /* load custom intra matrix */
-                if(get_bits1(&s->gb)){
-                    for(i=0; i<64; i++){
-                        v= get_bits(&s->gb, 8);
-                        if(v==0) break;
-
-                        j= zigzag_direct[i];
-                        s->intra_matrix[j]= v;
-                        s->chroma_intra_matrix[j]= v;
-                    }
+                /* replicate last value */
+                for(; i<64; i++){
+                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    s->intra_matrix[j]= v;
+                    s->chroma_intra_matrix[j]= v;
                 }
+            }
 
-                /* load custom non intra matrix */
-                if(get_bits1(&s->gb)){
-                    for(i=0; i<64; i++){
-                        v= get_bits(&s->gb, 8);
-                        if(v==0) break;
-
-                        j= zigzag_direct[i];
-                        s->inter_matrix[j]= v;
-                        s->chroma_inter_matrix[j]= v;
-                    }
+            /* load custom non intra matrix */
+            if(get_bits1(gb)){
+                int last=0;
+                for(i=0; i<64; i++){
+                    v= get_bits(gb, 8);
+                    if(v==0) break;
 
-                    /* replicate last value */
-                    for(; i<64; i++){
-                        j= zigzag_direct[i];
-                        s->inter_matrix[j]= v;
-                        s->chroma_inter_matrix[j]= v;
-                    }
+                    last= v;
+                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    s->inter_matrix[j]= v;
+                    s->chroma_inter_matrix[j]= v;
                 }
 
-                // FIXME a bunch of grayscale shape things
+                /* replicate last value */
+                for(; i<64; i++){
+                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    s->inter_matrix[j]= last;
+                    s->chroma_inter_matrix[j]= last;
+                }
             }
 
-            if(vo_ver_id != 1)
-                 s->quarter_sample= get_bits1(&s->gb);
-            else s->quarter_sample=0;
+            // FIXME a bunch of grayscale shape things
+        }
+
+        if(vo_ver_id != 1)
+             s->quarter_sample= get_bits1(gb);
+        else s->quarter_sample=0;
 
-            if(!get_bits1(&s->gb)) printf("Complexity estimation not supported\n");
+        if(!get_bits1(gb)) printf("Complexity estimation not supported\n");
 
-            s->resync_marker= !get_bits1(&s->gb); /* resync_marker_disabled */
+        s->resync_marker= !get_bits1(gb); /* resync_marker_disabled */
 
-            s->data_partitioning= get_bits1(&s->gb);
-            if(s->data_partitioning){
-                s->rvlc= get_bits1(&s->gb);
-                if(s->rvlc){
-                    printf("reversible vlc not supported\n");
-                }
-            }
-            
-            if(vo_ver_id != 1) {
-                s->new_pred= get_bits1(&s->gb);
-                if(s->new_pred){
-                    printf("new pred not supported\n");
-                    skip_bits(&s->gb, 2); /* requested upstream message type */
-                    skip_bits1(&s->gb); /* newpred segment type */
-                }
-                s->reduced_res_vop= get_bits1(&s->gb);
-                if(s->reduced_res_vop) printf("reduced resolution VOP not supported\n");
+        s->data_partitioning= get_bits1(gb);
+        if(s->data_partitioning){
+            s->rvlc= get_bits1(gb);
+            if(s->rvlc){
+                printf("reversible vlc not supported\n");
             }
-            else{
-                s->new_pred=0;
-                s->reduced_res_vop= 0;
+        }
+        
+        if(vo_ver_id != 1) {
+            s->new_pred= get_bits1(gb);
+            if(s->new_pred){
+                printf("new pred not supported\n");
+                skip_bits(gb, 2); /* requested upstream message type */
+                skip_bits1(gb); /* newpred segment type */
             }
+            s->reduced_res_vop= get_bits1(gb);
+            if(s->reduced_res_vop) printf("reduced resolution VOP not supported\n");
+        }
+        else{
+            s->new_pred=0;
+            s->reduced_res_vop= 0;
+        }
 
-            s->scalability= get_bits1(&s->gb);
-            if(s->workaround_bugs==1) s->scalability=0;
-            if (s->scalability) {
-                int dummy= s->hierachy_type= get_bits1(&s->gb);
-                int ref_layer_id= get_bits(&s->gb, 4);
-                int ref_layer_sampling_dir= get_bits1(&s->gb);
-                int h_sampling_factor_n= get_bits(&s->gb, 5);
-                int h_sampling_factor_m= get_bits(&s->gb, 5);
-                int v_sampling_factor_n= get_bits(&s->gb, 5);
-                int v_sampling_factor_m= get_bits(&s->gb, 5);
-                s->enhancement_type= get_bits1(&s->gb);
-                // bin shape stuff FIXME
+        s->scalability= get_bits1(gb);
+
+        if (s->scalability) {
+            GetBitContext bak= *gb;
+            int ref_layer_id;
+            int ref_layer_sampling_dir;
+            int h_sampling_factor_n;
+            int h_sampling_factor_m;
+            int v_sampling_factor_n;
+            int v_sampling_factor_m;
+            
+            s->hierachy_type= get_bits1(gb);
+            ref_layer_id= get_bits(gb, 4);
+            ref_layer_sampling_dir= get_bits1(gb);
+            h_sampling_factor_n= get_bits(gb, 5);
+            h_sampling_factor_m= get_bits(gb, 5);
+            v_sampling_factor_n= get_bits(gb, 5);
+            v_sampling_factor_m= get_bits(gb, 5);
+            s->enhancement_type= get_bits1(gb);
+            
+            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0 
+               || v_sampling_factor_n==0 || v_sampling_factor_m==0){
+               
+//                fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n");
+                s->scalability=0;
+               
+                *gb= bak;
+            }else
                 printf("scalability not supported\n");
-            }
+            
+            // bin shape stuff FIXME
         }
-//printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
-        goto redo;
-    } else if (startcode == 0x1b2) { //userdata
-        char buf[256];
-        int i;
-        int e;
-        int ver, build;
-
-//printf("user Data %X\n", show_bits(&s->gb, 32));
-        buf[0]= show_bits(&s->gb, 8);
-        for(i=1; i<256; i++){
-            buf[i]= show_bits(&s->gb, 16)&0xFF;
-            if(buf[i]==0) break;
-            skip_bits(&s->gb, 8);
+    }
+    return 0;
+}
+
+static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
+    char buf[256];
+    int i;
+    int e;
+    int ver, build, ver2, ver3;
+
+    buf[0]= show_bits(gb, 8);
+    for(i=1; i<256; i++){
+        buf[i]= show_bits(gb, 16)&0xFF;
+        if(buf[i]==0) break;
+        skip_bits(gb, 8);
+    }
+    buf[255]=0;
+    
+    /* divx detection */
+    e=sscanf(buf, "DivX%dBuild%d", &ver, &build);
+    if(e!=2)
+        e=sscanf(buf, "DivX%db%d", &ver, &build);
+    if(e==2){
+        s->divx_version= ver;
+        s->divx_build= build;
+        if(s->picture_number==0){
+            printf("This file was encoded with DivX%d Build%d\n", ver, build);
         }
-        buf[255]=0;
-        e=sscanf(buf, "DivX%dBuild%d", &ver, &build);
-        if(e!=2)
-            e=sscanf(buf, "DivX%db%d", &ver, &build);
-        if(e==2){
-            s->divx_version= ver;
-            s->divx_build= build;
-            if(s->picture_number==0){
-                printf("This file was encoded with DivX%d Build%d\n", ver, build);
-                if(ver==500 && build==413){
-                    printf("WARNING: this version of DivX is not MPEG4 compatible, trying to workaround these bugs...\n");
-                }
-            }
+    }
+    
+    /* ffmpeg detection */
+    e=sscanf(buf, "FFmpeg%d.%d.%db%d", &ver, &ver2, &ver3, &build);
+    if(e!=4)
+        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); 
+    if(e!=4){
+        if(strcmp(buf, "ffmpeg")==0){
+            s->ffmpeg_version= 0x000406;
+            s->lavc_build= 4600;
         }
-//printf("User Data: %s\n", buf);
-        goto redo;
-    } else if (startcode != 0x1b6) { //VOP
-        goto redo;
     }
+    if(e==4){
+        s->ffmpeg_version= ver*256*256 + ver2*256 + ver3;
+        s->lavc_build= build;
+        if(s->picture_number==0)
+            printf("This file was encoded with libavcodec build %d\n", build);
+    }
+    
+    /* xvid detection */
+    e=sscanf(buf, "XviD%d", &build);
+    if(e==1){
+        s->xvid_build= build;
+        if(s->picture_number==0)
+            printf("This file was encoded with XviD build %d\n", build);
+    }
+
+//printf("User Data: %s\n", buf);
+    return 0;
+}
 
-    s->pict_type = get_bits(&s->gb, 2) + I_TYPE;       /* pict type: I = 0 , P = 1 */
-//if(s->pict_type!=I_TYPE) return FRAME_SKIPED;
+static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
+    int time_incr, time_increment;
+
+    s->pict_type = get_bits(gb, 2) + I_TYPE;   /* pict type: I = 0 , P = 1 */
     if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0){
         printf("low_delay flag set, but shouldnt, clearing it\n");
         s->low_delay=0;
     }
-// printf("pic: %d, qpel:%d part:%d resync:%d\n", s->pict_type, s->quarter_sample, s->data_partitioning, s->resync_marker); 
+    s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
+    if(s->partitioned_frame)
+        s->decode_mb= mpeg4_decode_partitioned_mb;
+    else
+        s->decode_mb= ff_h263_decode_mb;
+
+    if(s->time_increment_resolution==0){
+        s->time_increment_resolution=1;
+//        fprintf(stderr, "time_increment_resolution is illegal\n");
+    }
     time_incr=0;
-    while (get_bits1(&s->gb) != 0) 
+    while (get_bits1(gb) != 0) 
         time_incr++;
 
-    check_marker(&s->gb, "before time_increment");
-    time_increment= get_bits(&s->gb, s->time_increment_bits);
-//printf(" type:%d incr:%d increment:%d\n", s->pict_type, time_incr, time_increment);
+    check_marker(gb, "before time_increment");
+    time_increment= get_bits(gb, s->time_increment_bits);
+//printf(" type:%d modulo_time_base:%d increment:%d\n", s->pict_type, time_incr, time_increment);
     if(s->pict_type!=B_TYPE){
         s->last_time_base= s->time_base;
         s->time_base+= time_incr;
         s->time= s->time_base*s->time_increment_resolution + time_increment;
+        if(s->workaround_bugs&FF_BUG_UMP4){
+            if(s->time < s->last_non_b_time){
+//                fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n");
+                s->time_base++;
+                s->time+= s->time_increment_resolution;
+            }
+        }
         s->pp_time= s->time - s->last_non_b_time;
         s->last_non_b_time= s->time;
     }else{
         s->time= (s->last_time_base + time_incr)*s->time_increment_resolution + time_increment;
-        s->bp_time= s->last_non_b_time - s->time;
-        if(s->pp_time <=s->bp_time){
+        s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
+        if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){
 //            printf("messed up order, seeking?, skiping current b frame\n");
             return FRAME_SKIPED;
         }
+        
+        if(s->t_frame==0) s->t_frame= s->time - s->last_time_base;
+        if(s->t_frame==0) s->t_frame=1; // 1/0 protection
+//printf("%Ld %Ld %d %d\n", s->last_non_b_time, s->time, s->pp_time, s->t_frame); fflush(stdout);
+        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame) 
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
+        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame) 
+                           - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
     }
-
-    if(check_marker(&s->gb, "before vop_coded")==0 && s->picture_number==0){
+    
+    s->avctx->pts= s->time*1000LL*1000LL / s->time_increment_resolution;
+    
+    if(check_marker(gb, "before vop_coded")==0 && s->picture_number==0){
         printf("hmm, seems the headers arnt complete, trying to guess time_increment_bits\n");
         for(s->time_increment_bits++ ;s->time_increment_bits<16; s->time_increment_bits++){
-            if(get_bits1(&s->gb)) break;
+            if(get_bits1(gb)) break;
         }
         printf("my guess is %d bits ;)\n",s->time_increment_bits);
     }
     /* vop coded */
-    if (get_bits1(&s->gb) != 1)
-        goto redo;
-//printf("time %d %d %d || %d %d %d\n", s->time_increment_bits, s->time_increment, s->time_base,
-//s->time, s->last_non_b_time[0], s->last_non_b_time[1]);  
+    if (get_bits1(gb) != 1){
+        printf("vop not coded\n");
+        return FRAME_SKIPED;
+    }
+//printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base,
+//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);  
     if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
                           || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
         /* rounding type for motion estimation */
-       s->no_rounding = get_bits1(&s->gb);
+       s->no_rounding = get_bits1(gb);
     } else {
        s->no_rounding = 0;
     }
@@ -4019,53 +4463,60 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
          if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) {
              int width, height, hor_spat_ref, ver_spat_ref;
  
-             width = get_bits(&s->gb, 13);
-             skip_bits1(&s->gb);   /* marker */
-             height = get_bits(&s->gb, 13);
-             skip_bits1(&s->gb);   /* marker */
-             hor_spat_ref = get_bits(&s->gb, 13); /* hor_spat_ref */
-             skip_bits1(&s->gb);   /* marker */
-             ver_spat_ref = get_bits(&s->gb, 13); /* ver_spat_ref */
+             width = get_bits(gb, 13);
+             skip_bits1(gb);   /* marker */
+             height = get_bits(gb, 13);
+             skip_bits1(gb);   /* marker */
+             hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */
+             skip_bits1(gb);   /* marker */
+             ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */
          }
-         skip_bits1(&s->gb); /* change_CR_disable */
+         skip_bits1(gb); /* change_CR_disable */
  
-         if (get_bits1(&s->gb) != 0) {
-             skip_bits(&s->gb, 8); /* constant_alpha_value */
+         if (get_bits1(gb) != 0) {
+             skip_bits(gb, 8); /* constant_alpha_value */
          }
      }
 //FIXME complexity estimation stuff
      
      if (s->shape != BIN_ONLY_SHAPE) {
          int t;
-         t=get_bits(&s->gb, 3); /* intra dc VLC threshold */
+         t=get_bits(gb, 3); /* intra dc VLC threshold */
 //printf("threshold %d\n", t);
          if(!s->progressive_sequence){
-             s->top_field_first= get_bits1(&s->gb);
-             s->alternate_scan= get_bits1(&s->gb);
-//printf("top:%d alt:%d\n", s->top_field_first, s->alternate_scan);
-         }
+             s->top_field_first= get_bits1(gb);
+             s->alternate_scan= get_bits1(gb);
+         }else
+             s->alternate_scan= 0;
      }
 
+     if(s->alternate_scan){
+         ff_init_scantable(s, &s->inter_scantable  , ff_alternate_vertical_scan);
+         ff_init_scantable(s, &s->intra_scantable  , ff_alternate_vertical_scan);
+         ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_vertical_scan);
+         ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
+     } else{
+         ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
+         ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
+         ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
+         ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
+     }
      if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
-         if(s->num_sprite_warping_points){
-             mpeg4_decode_sprite_trajectory(s);
-         }
+         mpeg4_decode_sprite_trajectory(s);
          if(s->sprite_brightness_change) printf("sprite_brightness_change not supported\n");
          if(s->vol_sprite_usage==STATIC_SPRITE) printf("static sprite not supported\n");
      }
 
      if (s->shape != BIN_ONLY_SHAPE) {
-         /* note: we do not use quant_precision to avoid problem if no
-            MPEG4 vol header as it is found on some old opendivx
-            movies */
-         s->qscale = get_bits(&s->gb, 5);
+         s->qscale = get_bits(gb, s->quant_precision);
          if(s->qscale==0){
              printf("Error, header damaged or not MPEG4 header (qscale=0)\n");
              return -1; // makes no sense to continue, as there is nothing left from the image then
          }
   
          if (s->pict_type != I_TYPE) {
-             s->f_code = get_bits(&s->gb, 3);  /* fcode_for */
+             s->f_code = get_bits(gb, 3);      /* fcode_for */
              if(s->f_code==0){
                  printf("Error, header damaged or not MPEG4 header (f_code=0)\n");
                  return -1; // makes no sense to continue, as the MV decoding will break very quickly
@@ -4074,23 +4525,29 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
              s->f_code=1;
      
          if (s->pict_type == B_TYPE) {
-             s->b_code = get_bits(&s->gb, 3);
+             s->b_code = get_bits(gb, 3);
          }else
              s->b_code=1;
-
-//printf("quant:%d fcode:%d bcode:%d type:%d size:%d\n", s->qscale, s->f_code, s->b_code, s->pict_type, s->gb.size);
+#if 0
+printf("qp:%d fc:%d bc:%d type:%s size:%d pro:%d alt:%d top:%d qpel:%d part:%d resync:%d w:%d a:%d\n", 
+    s->qscale, s->f_code, s->b_code, 
+    s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
+    gb->size,s->progressive_sequence, s->alternate_scan, s->top_field_first, 
+    s->quarter_sample, s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
+    s->sprite_warping_accuracy); 
+#endif
          if(!s->scalability){
              if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) {
-                 skip_bits1(&s->gb); // vop shape coding type
+                 skip_bits1(gb); // vop shape coding type
              }
          }else{
              if(s->enhancement_type){
-                 int load_backward_shape= get_bits1(&s->gb);
+                 int load_backward_shape= get_bits1(gb);
                  if(load_backward_shape){
                      printf("load backward shape isnt supported\n");
                  }
              }
-             skip_bits(&s->gb, 2); //ref_select_code
+             skip_bits(gb, 2); //ref_select_code
          }
      }
      /* detect buggy encoders which dont set the low_delay flag (divx4/xvid/opendivx)*/
@@ -4112,6 +4569,53 @@ int mpeg4_decode_picture_header(MpegEncContext * s)
      return 0;
 }
 
+/**
+ * decode mpeg4 headers
+ * @return <0 if no VOP found (or a damaged one)
+ *         FRAME_SKIPPED if a not coded VOP is found
+ *         0 if a VOP is found
+ */
+int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
+{
+    int startcode, v;
+
+    /* search next start code */
+    align_get_bits(gb);
+    startcode = 0xff;
+    for(;;) {
+        v = get_bits(gb, 8);
+        startcode = ((startcode << 8) | v) & 0xffffffff;
+        
+        if(get_bits_count(gb) >= gb->size*8){
+            if(gb->size==1 && s->divx_version){
+                printf("frame skip %d\n", gb->size);
+                return FRAME_SKIPED; //divx bug
+            }else
+                return -1; //end of stream
+        }
+
+        if((startcode&0xFFFFFF00) != 0x100)
+            continue; //no startcode
+        
+        switch(startcode){
+        case 0x120:
+            decode_vol_header(s, gb);
+            break;
+        case 0x1b2:
+            decode_user_data(s, gb);
+            break;
+        case 0x1b6:
+            return decode_vop_header(s, gb);
+        default:
+//            printf("startcode %X found\n", startcode);
+            break;
+        }
+
+        align_get_bits(gb);
+        startcode = 0xff;
+    }
+}
+
 /* don't understand why they choose a different header ! */
 int intel_h263_decode_picture_header(MpegEncContext *s)
 {
@@ -4172,6 +4676,10 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
         skip_bits(&s->gb, 8);
     }
     s->f_code = 1;
+
+    s->y_dc_scale_table=
+    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
+
     return 0;
 }