]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/msmpeg4.c
moved the tables into header files (and applied the 'static' patch). Nick: why do...
[ffmpeg] / libavcodec / msmpeg4.c
index c53dc0d8aeb18cbe196eb4a78fd98502a1836323..839d8dbac4284424b2797a8030f430c043be8537 100644 (file)
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  */
+
+/**
+ * @file msmpeg4.c
+ * MSMPEG4 backend for ffmpeg encoder and decoder.
+ */
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
-//#define PRINT_MB
 
 /*
  * You can also call this codec : MPEG4 with a twist ! 
 #define MB_NON_INTRA_VLC_BITS 9
 #define MB_INTRA_VLC_BITS 9
 
-static UINT32 v2_dc_lum_table[512][2];
-static UINT32 v2_dc_chroma_table[512][2];
+#define II_BITRATE 128*1024
+#define MBAC_BITRATE 50*1024
+
+#define DEFAULT_INTER_INDEX 3
+
+static uint32_t v2_dc_lum_table[512][2];
+static uint32_t v2_dc_chroma_table[512][2];
 
 static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
-                                       int n, int coded);
+                                       int n, int coded, const uint8_t *scantable);
 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
 static int msmpeg4_decode_motion(MpegEncContext * s, 
                                  int *mx_ptr, int *my_ptr);
 static void msmpeg4v2_encode_motion(MpegEncContext * s, int val);
 static void init_h263_dc_for_msmpeg4(void);
 static inline void msmpeg4_memsetw(short *tab, int val, int n);
+#ifdef CONFIG_ENCODERS
+static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
+#endif //CONFIG_ENCODERS
+static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
 
 
-
-extern UINT32 inverse[256];
-
 #ifdef DEBUG
 int intra_count = 0;
 int frame_count = 0;
@@ -69,6 +82,10 @@ int frame_count = 0;
 
 #include "msmpeg4data.h"
 
+#ifdef CONFIG_ENCODERS //strangely gcc includes this even if its not references
+static uint8_t rl_length[NB_RL_TABLES][MAX_LEVEL+1][MAX_RUN+1][2];
+#endif //CONFIG_ENCODERS
+
 #ifdef STATS
 
 const char *st_names[ST_NB] = {
@@ -153,47 +170,36 @@ static void common_init(MpegEncContext * s)
         }
         break;
     case 4:
+    case 5:
         s->y_dc_scale_table= wmv1_y_dc_scale_table;
         s->c_dc_scale_table= wmv1_c_dc_scale_table;
         break;
     }
 
-    if(s->msmpeg4_version==4){
-        s->intra_scantable  = wmv1_scantable[1];
-        s->intra_h_scantable= wmv1_scantable[2];
-        s->intra_v_scantable= wmv1_scantable[3];
-        s->inter_scantable  = wmv1_scantable[0];
-    }else{
-        s->intra_scantable  = zigzag_direct; 
-        s->intra_h_scantable= ff_alternate_horizontal_scan; 
-        s->intra_v_scantable= ff_alternate_vertical_scan; 
-        s->inter_scantable  = zigzag_direct; 
+    
+    if(s->msmpeg4_version>=4){
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , wmv1_scantable[1]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, wmv1_scantable[2]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, wmv1_scantable[3]);
+        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , wmv1_scantable[0]);
     }
+    //Note the default tables are set in common_init in mpegvideo.c
     
     if(!inited){
-        int i;
         inited=1;
 
         init_h263_dc_for_msmpeg4();
-
-        /* permute for IDCT */
-        for(i=0; i<WMV1_SCANTABLE_COUNT; i++){
-            int k;
-            for(k=0;k<64;k++) {
-                int j = wmv1_scantable[i][k];
-                wmv1_scantable[i][k]= block_permute_op(j);
-            }
-        }
-
     }
 }
 
+#ifdef CONFIG_ENCODERS
+
 /* build the table which associate a (x,y) motion vector to a vlc */
 static void init_mv_table(MVTable *tab)
 {
     int i, x, y;
 
-    tab->table_mv_index = av_malloc(sizeof(UINT16) * 4096);
+    tab->table_mv_index = av_malloc(sizeof(uint16_t) * 4096);
     /* mark all entries as not used */
     for(i=0;i<4096;i++)
         tab->table_mv_index[i] = tab->n;
@@ -233,6 +239,19 @@ void ff_msmpeg4_encode_init(MpegEncContext *s)
         init_mv_table(&mv_tables[1]);
         for(i=0;i<NB_RL_TABLES;i++)
             init_rl(&rl_table[i]);
+
+        for(i=0; i<NB_RL_TABLES; i++){
+            int level;
+            for(level=0; level<=MAX_LEVEL; level++){
+                int run;
+                for(run=0; run<=MAX_RUN; run++){
+                    int last;
+                    for(last=0; last<2; last++){
+                        rl_length[i][level][run][last]= get_size_of_code(s, &rl_table[  i], last, run, level, 0);
+                    }
+                }
+            }
+        }
     }
 }
 
@@ -282,8 +301,7 @@ static void find_best_tables(MpegEncContext * s)
     int i;
     int best       =-1, best_size       =9999999;
     int chroma_best=-1, best_chroma_size=9999999;
-    int last_size=0;
-    
+
     for(i=0; i<3; i++){
         int level;
         int chroma_size=0;
@@ -297,20 +315,22 @@ static void find_best_tables(MpegEncContext * s)
             int run;
             for(run=0; run<=MAX_RUN; run++){
                 int last;
+                const int last_size= size + chroma_size;
                 for(last=0; last<2; last++){
                     int inter_count       = s->ac_stats[0][0][level][run][last] + s->ac_stats[0][1][level][run][last];
                     int intra_luma_count  = s->ac_stats[1][0][level][run][last];
                     int intra_chroma_count= s->ac_stats[1][1][level][run][last];
-
+                    
                     if(s->pict_type==I_TYPE){
-                        size       += intra_luma_count  *get_size_of_code(s, &rl_table[  i], last, run, level,1);
-                        chroma_size+= intra_chroma_count*get_size_of_code(s, &rl_table[3+i], last, run, level,1);
+                        size       += intra_luma_count  *rl_length[i  ][level][run][last];
+                        chroma_size+= intra_chroma_count*rl_length[i+3][level][run][last];
                     }else{
-                        size+=        intra_luma_count  *get_size_of_code(s, &rl_table[  i], last, run, level,1)
-                                     +intra_chroma_count*get_size_of_code(s, &rl_table[3+i], last, run, level,1)
-                                     +inter_count       *get_size_of_code(s, &rl_table[3+i], last, run, level,0);
+                        size+=        intra_luma_count  *rl_length[i  ][level][run][last]
+                                     +intra_chroma_count*rl_length[i+3][level][run][last]
+                                     +inter_count       *rl_length[i+3][level][run][last];
                     }                   
                 }
+                if(last_size == size+chroma_size) break;
             }
         }
         if(size<best_size){
@@ -322,6 +342,7 @@ static void find_best_tables(MpegEncContext * s)
             chroma_best= i;
         }
     }
+
 //    printf("type:%d, best:%d, qp:%d, var:%d, mcvar:%d, size:%d //\n", 
 //           s->pict_type, best, s->qscale, s->mb_var_sum, s->mc_mb_var_sum, best_size);
            
@@ -360,16 +381,17 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     s->mv_table_index = 1; /* only if P frame */
     s->use_skip_mb_code = 1; /* only if P frame */
     s->per_mb_rl_table = 0;
-    s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=128 && s->pict_type==P_TYPE);
+    if(s->msmpeg4_version==4)
+        s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
 
     if (s->pict_type == I_TYPE) {
-        s->no_rounding = 1;
         s->slice_height= s->mb_height/1;
         put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
         
         if(s->msmpeg4_version==4){
             msmpeg4_encode_ext_header(s);
-            if(s->bit_rate>50)
+            if(s->bit_rate>MBAC_BITRATE)
                 put_bits(&s->pb, 1, s->per_mb_rl_table);
         }
 
@@ -384,7 +406,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     } else {
         put_bits(&s->pb, 1, s->use_skip_mb_code);
         
-        if(s->msmpeg4_version==4 && s->bit_rate>50)
+        if(s->msmpeg4_version==4 && s->bit_rate>MBAC_BITRATE)
             put_bits(&s->pb, 1, s->per_mb_rl_table);
 
         if(s->msmpeg4_version>2){
@@ -395,12 +417,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 
             put_bits(&s->pb, 1, s->mv_table_index);
         }
-
-       if(s->flipflop_rounding){
-           s->no_rounding ^= 1;
-       }else{
-           s->no_rounding = 0;
-       }
     }
 
     s->esc3_level_length= 0;
@@ -414,20 +430,20 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 
 void msmpeg4_encode_ext_header(MpegEncContext * s)
 {
-        put_bits(&s->pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29
+        put_bits(&s->pb, 5, s->avctx->frame_rate / s->avctx->frame_rate_base); //yes 29.97 -> 29
 
-        put_bits(&s->pb, 11, MIN(s->bit_rate, 2047));
+        put_bits(&s->pb, 11, FFMIN(s->bit_rate/1024, 2047));
 
-        if(s->msmpeg4_version<3)
-            s->flipflop_rounding=0;
-        else{
-            s->flipflop_rounding=1;
+        if(s->msmpeg4_version>=3)
             put_bits(&s->pb, 1, s->flipflop_rounding);
-        }
+        else
+            assert(s->flipflop_rounding==0);
 }
 
+#endif //CONFIG_ENCODERS
+
 /* predict coded block */
-static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_block_ptr)
+static inline int coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_block_ptr)
 {
     int xy, wrap, pred, a, b, c;
 
@@ -453,6 +469,8 @@ static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_bloc
     return pred;
 }
 
+#ifdef CONFIG_ENCODERS
+
 static void msmpeg4_encode_motion(MpegEncContext * s, 
                                   int mx, int my)
 {
@@ -495,27 +513,8 @@ static void msmpeg4_encode_motion(MpegEncContext * s,
 static inline void handle_slices(MpegEncContext *s){
     if (s->mb_x == 0) {
         if (s->slice_height && (s->mb_y % s->slice_height) == 0) {
-            if(s->msmpeg4_version != 4){
-                int wrap;
-                /* reset DC pred (set previous line to 1024) */
-                wrap = 2 * s->mb_width + 2;
-                msmpeg4_memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap],
-                                1024, 2 * s->mb_width);
-                wrap = s->mb_width + 2;
-                msmpeg4_memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap],
-                                1024, s->mb_width);
-                msmpeg4_memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap],
-                                1024, s->mb_width);
-
-                /* reset AC pred (set previous line to 0) */
-                wrap = s->mb_width * 2 + 2;
-                msmpeg4_memsetw(s->ac_val[0][0] + (1 + (2 * s->mb_y) * wrap)*16,
-                                0, 2 * s->mb_width*16);
-                wrap = s->mb_width + 2;
-                msmpeg4_memsetw(s->ac_val[1][0] + (1 + (s->mb_y) * wrap)*16,
-                                0, s->mb_width*16);
-                msmpeg4_memsetw(s->ac_val[2][0] + (1 + (s->mb_y) * wrap)*16,
-                                0, s->mb_width*16);
+            if(s->msmpeg4_version < 4){
+                ff_mpeg4_clean_buffers(s);
             }
             s->first_slice_line = 1;
         } else {
@@ -530,7 +529,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
 {
     int cbp, coded_cbp, i;
     int pred_x, pred_y;
-    UINT8 *coded_block;
+    uint8_t *coded_block;
 
     handle_slices(s);
     
@@ -545,6 +544,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
        if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
            /* skip macroblock */
            put_bits(&s->pb, 1, 1);
+            s->last_bits++;
+           s->misc_bits++;
+            s->skip_count++;
+
            return;
        }
         if (s->use_skip_mb_code)
@@ -560,7 +563,9 @@ void msmpeg4_encode_mb(MpegEncContext * s,
             put_bits(&s->pb, 
                      cbpy_tab[coded_cbp>>2][1], 
                      cbpy_tab[coded_cbp>>2][0]);
-                        
+
+            s->misc_bits += get_bits_diff(s);
+
             h263_pred_motion(s, 0, &pred_x, &pred_y);
             msmpeg4v2_encode_motion(s, motion_x - pred_x);
             msmpeg4v2_encode_motion(s, motion_y - pred_y);
@@ -569,11 +574,20 @@ void msmpeg4_encode_mb(MpegEncContext * s,
                      table_mb_non_intra[cbp + 64][1], 
                      table_mb_non_intra[cbp + 64][0]);
 
+            s->misc_bits += get_bits_diff(s);
+
             /* motion vector */
             h263_pred_motion(s, 0, &pred_x, &pred_y);
             msmpeg4_encode_motion(s, motion_x - pred_x, 
                                   motion_y - pred_y);
         }
+
+        s->mv_bits += get_bits_diff(s);
+
+        for (i = 0; i < 6; i++) {
+            msmpeg4_encode_block(s, block[i], i);
+        }
+        s->p_tex_bits += get_bits_diff(s);
     } else {
        /* compute cbp */
        cbp = 0;
@@ -624,16 +638,25 @@ void msmpeg4_encode_mb(MpegEncContext * s,
             }
             set_stat(ST_INTRA_MB);
             put_bits(&s->pb, 1, 0);    /* no AC prediction yet */
+            if(s->inter_intra_pred){
+                s->h263_aic_dir=0;
+                put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
+            }
         }
-    }
+        s->misc_bits += get_bits_diff(s);
 
-    for (i = 0; i < 6; i++) {
-        msmpeg4_encode_block(s, block[i], i);
+        for (i = 0; i < 6; i++) {
+            msmpeg4_encode_block(s, block[i], i);
+        }
+        s->i_tex_bits += get_bits_diff(s);
+        s->i_count++;
     }
 }
 
+#endif //CONFIG_ENCODERS
+
 /* old ffmpeg msmpeg4v3 mode */
-void ff_old_msmpeg4_dc_scale(MpegEncContext * s)
+static void ff_old_msmpeg4_dc_scale(MpegEncContext * s)
 {
     if (s->qscale < 5){
         s->y_dc_scale = 8;
@@ -648,7 +671,7 @@ void ff_old_msmpeg4_dc_scale(MpegEncContext * s)
 }
 
 static inline int msmpeg4v1_pred_dc(MpegEncContext * s, int n, 
-                                    INT32 **dc_val_ptr)
+                                    int32_t **dc_val_ptr)
 {
     int i;
 
@@ -672,15 +695,15 @@ static int get_dc(uint8_t *src, int stride, int scale)
             sum+=src[x + y*stride];
         }
     }
-    return (sum + (scale>>1))/scale;
+    return FASTDIV((sum + (scale>>1)), scale);
 }
 
 /* dir = 0: left, dir = 1: top prediction */
 static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, 
-                             UINT16 **dc_val_ptr, int *dir_ptr)
+                             uint16_t **dc_val_ptr, int *dir_ptr)
 {
     int a, b, c, wrap, pred, scale;
-    INT16 *dc_val;
+    int16_t *dc_val;
 
     /* find prediction */
     if (n < 4) {
@@ -698,6 +721,10 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
     a = dc_val[ - 1];
     b = dc_val[ - 1 - wrap];
     c = dc_val[ - wrap];
+    
+    if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version<4){
+        b=c=1024;
+    }
 
     /* XXX: the following solution consumes divisions, but it does not
        necessitate to modify mpegvideo.c. The problem comes from the
@@ -732,9 +759,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
        b = (b + (8 >> 1)) / 8;
        c = (c + (8 >> 1)) / 8;
     } else {
-       a = (a + (scale >> 1)) / scale;
-       b = (b + (scale >> 1)) / scale;
-       c = (c + (scale >> 1)) / scale;
+       a = FASTDIV((a + (scale >> 1)), scale);
+       b = FASTDIV((b + (scale >> 1)), scale);
+       c = FASTDIV((c + (scale >> 1)), scale);
     }
 #endif
     /* XXX: WARNING: they did not choose the same test as MPEG4. This
@@ -761,10 +788,10 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
             }else{
                 if(n<4){
                     wrap= s->linesize;
-                    dest= s->current_picture[0] + (((n>>1) + 2*s->mb_y) * 8*  wrap ) + ((n&1) + 2*s->mb_x) * 8;
+                    dest= s->current_picture.data[0] + (((n>>1) + 2*s->mb_y) * 8*  wrap ) + ((n&1) + 2*s->mb_x) * 8;
                 }else{
-                    wrap= s->linesize>>1;
-                    dest= s->current_picture[n-3] + (s->mb_y * 8 * wrap) + s->mb_x * 8;
+                    wrap= s->uvlinesize;
+                    dest= s->current_picture.data[n-3] + (s->mb_y * 8 * wrap) + s->mb_x * 8;
                 }
                 if(s->mb_x==0) a= (1024 + (scale>>1))/scale;
                 else           a= get_dc(dest-8, wrap, scale*8);
@@ -827,13 +854,13 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
     int pred;
 
     if(s->msmpeg4_version==1){
-        INT32 *dc_val;
+        int32_t *dc_val;
         pred = msmpeg4v1_pred_dc(s, n, &dc_val);
         
         /* update predictor */
         *dc_val= level;
     }else{
-        UINT16 *dc_val;
+        uint16_t *dc_val;
         pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
 
         /* update predictor */
@@ -899,7 +926,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
     int last_non_zero, sign, slevel;
     int code, run_diff, dc_pred_dir;
     const RLTable *rl;
-    const UINT8 *scantable;
+    const uint8_t *scantable;
 
     if (s->mb_intra) {
         set_stat(ST_DC);
@@ -911,7 +938,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
             rl = &rl_table[3 + s->rl_chroma_table_index];
         }
         run_diff = 0;
-        scantable= s->intra_scantable;
+        scantable= s->intra_scantable.permutated;
         set_stat(ST_INTRA_AC);
     } else {
         i = 0;
@@ -920,15 +947,16 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
             run_diff = 0;
         else
             run_diff = 1;
-        scantable= s->inter_scantable;
+        scantable= s->inter_scantable.permutated;
         set_stat(ST_INTER_AC);
     }
 
     /* recalculate block_last_index for M$ wmv1 */
-    if(scantable!=zigzag_direct && s->block_last_index[n]>0){
+    if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
         for(last_index=63; last_index>=0; last_index--){
             if(block[scantable[last_index]]) break;
         }
+        s->block_last_index[n]= last_index;
     }else
         last_index = s->block_last_index[n];
     /* AC coefs */
@@ -945,6 +973,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
                sign = 1;
                level = -level;
            }
+
             if(level<=MAX_LEVEL && run<=MAX_RUN){
                 s->ac_stats[s->mb_intra][n>3][level][run][last]++;
             }
@@ -975,7 +1004,7 @@ else
                         /* third escape */
                         put_bits(&s->pb, 1, 0);
                         put_bits(&s->pb, 1, last);
-                        if(s->msmpeg4_version==4){
+                        if(s->msmpeg4_version>=4){
                             if(s->esc3_level_length==0){
                                 s->esc3_level_length=8;
                                 s->esc3_run_length= 6;
@@ -1014,7 +1043,7 @@ else
 /****************************************/
 /* decoding stuff */
 
-static VLC mb_non_intra_vlc;
+static VLC mb_non_intra_vlc[4];
 static VLC mb_intra_vlc;
 static VLC dc_lum_vlc[2];
 static VLC dc_chroma_vlc[2];
@@ -1139,9 +1168,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
                  &mvtab[0][1], 2, 1,
                  &mvtab[0][0], 2, 1);
 
-        init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128, 
-                 &table_mb_non_intra[0][1], 8, 4,
-                 &table_mb_non_intra[0][0], 8, 4);
+        for(i=0; i<4; i++){
+            init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128, 
+                     &wmv2_inter_table[i][0][1], 8, 4,
+                     &wmv2_inter_table[i][0][0], 8, 4); //FIXME name?
+        }
+        
         init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64, 
                  &table_mb_intra[0][1], 4, 2,
                  &table_mb_intra[0][0], 4, 2);
@@ -1157,6 +1189,23 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
                  &table_inter_intra[0][1], 2, 1,
                  &table_inter_intra[0][0], 2, 1);
     }
+    
+    switch(s->msmpeg4_version){
+    case 1:
+    case 2:
+        s->decode_mb= msmpeg4v12_decode_mb;
+        break;
+    case 3:
+    case 4:
+        s->decode_mb= msmpeg4v34_decode_mb;
+        break;
+    case 5:
+        s->decode_mb= wmv2_decode_mb;
+        break;
+    }
+    
+    s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
+    
     return 0;
 }
 
@@ -1177,7 +1226,7 @@ int msmpeg4_decode_picture_header(MpegEncContext * s)
 #if 0
 {
 int i;
-for(i=0; i<s->gb.size*8; i++)
+for(i=0; i<s->gb.size_in_bits; i++)
     printf("%d", get_bits1(&s->gb));
 //    get_bits1(&s->gb);
 printf("END\n");
@@ -1210,6 +1259,10 @@ return -1;
 }
 #endif
     s->qscale = get_bits(&s->gb, 5);
+    if(s->qscale==0){
+        fprintf(stderr, "invalid qscale\n");
+        return -1;
+    }
 
     if (s->pict_type == I_TYPE) {
         code = get_bits(&s->gb, 5); 
@@ -1247,8 +1300,8 @@ return -1;
         case 4:
             msmpeg4_decode_ext_header(s, (2+5+5+17+7)/8);
 
-            if(s->bit_rate > 50) s->per_mb_rl_table= get_bits1(&s->gb);
-            else                 s->per_mb_rl_table= 0;
+            if(s->bit_rate > MBAC_BITRATE) s->per_mb_rl_table= get_bits1(&s->gb);
+            else                           s->per_mb_rl_table= 0;
             
             if(!s->per_mb_rl_table){
                 s->rl_chroma_table_index = decode012(&s->gb);
@@ -1292,8 +1345,8 @@ return -1;
         case 4:
             s->use_skip_mb_code = get_bits1(&s->gb);
 
-            if(s->bit_rate > 50) s->per_mb_rl_table= get_bits1(&s->gb);
-            else                 s->per_mb_rl_table= 0;
+            if(s->bit_rate > MBAC_BITRATE) s->per_mb_rl_table= get_bits1(&s->gb);
+            else                           s->per_mb_rl_table= 0;
 
             if(!s->per_mb_rl_table){
                 s->rl_table_index = decode012(&s->gb);
@@ -1303,7 +1356,7 @@ return -1;
             s->dc_table_index = get_bits1(&s->gb);
 
             s->mv_table_index = get_bits1(&s->gb);
-            s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=128);
+            s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
             break;
         }
 /*     printf("skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d   \n", 
@@ -1320,6 +1373,7 @@ return -1;
            s->no_rounding = 0;
        }
     }
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
 
     s->esc3_level_length= 0;
     s->esc3_run_length= 0;
@@ -1340,13 +1394,13 @@ int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size)
         int fps;
 
         fps= get_bits(&s->gb, 5);
-        s->bit_rate= get_bits(&s->gb, 11);
+        s->bit_rate= get_bits(&s->gb, 11)*1024;
         if(s->msmpeg4_version>=3)
             s->flipflop_rounding= get_bits1(&s->gb);
         else
             s->flipflop_rounding= 0;
 
-//        printf("fps:%2d bps:%2d roundingType:%1d\n", fps, s->bit_rate, s->flipflop_rounding);
+//        printf("fps:%2d bps:%2d roundingType:%1d\n", fps, s->bit_rate/1024, s->flipflop_rounding);
     }
     else if(left<length+8)
     {
@@ -1415,10 +1469,12 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
         return pred;
     sign = get_bits1(&s->gb);
     shift = f_code - 1;
-    val = (code - 1) << shift;
-    if (shift > 0)
+    val = code;
+    if (shift) {
+        val = (val - 1) << shift;
         val |= get_bits(&s->gb, shift);
-    val++;
+        val++;
+    }
     if (sign)
         val = -val;
 
@@ -1431,11 +1487,10 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
     return val;
 }
 
-
-static int msmpeg4v12_decode_mb(MpegEncContext *s, 
-                      DCTELEM block[6][64])
+static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
     int cbp, code, i;
+    
     if (s->pict_type == P_TYPE) {
         if (s->use_skip_mb_code) {
             if (get_bits1(&s->gb)) {
@@ -1508,7 +1563,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s,
     }
 
     for (i = 0; i < 6; i++) {
-        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
        {
              fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
              return -1;
@@ -1517,23 +1572,12 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s,
     return 0;
 }
 
-int msmpeg4_decode_mb(MpegEncContext *s, 
-                      DCTELEM block[6][64])
+static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
     int cbp, code, i;
-    UINT8 *coded_val;
-
-#ifdef PRINT_MB
-if(s->mb_x==0){
-    printf("\n");
-    if(s->mb_y==0) printf("\n");
-}
-#endif
-    /* special slice handling */
-    handle_slices(s);
+    uint8_t *coded_val;
+    uint32_t * const mb_type_ptr= &s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ];
 
-    if(s->msmpeg4_version<=2) return msmpeg4v12_decode_mb(s, block); //FIXME export function & call from outside perhaps
-    
     if (s->pict_type == P_TYPE) {
         set_stat(ST_INTER_MB);
         if (s->use_skip_mb_code) {
@@ -1547,14 +1591,13 @@ if(s->mb_x==0){
                 s->mv[0][0][0] = 0;
                 s->mv[0][0][1] = 0;
                 s->mb_skiped = 1;
-#ifdef PRINT_MB
-printf("S ");
-#endif
+                *mb_type_ptr = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
+
                 return 0;
             }
         }
         
-        code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3);
+        code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
         if (code < 0)
             return -1;
        //s->mb_intra = (code & 0x40) ? 0 : 1;
@@ -1595,16 +1638,12 @@ printf("S ");
         s->mv_type = MV_TYPE_16X16;
         s->mv[0][0][0] = mx;
         s->mv[0][0][1] = my;
-#ifdef PRINT_MB
-printf("P ");
-#endif
+        *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16;
     } else {
 //printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
         set_stat(ST_INTRA_MB);
         s->ac_pred = get_bits1(&s->gb);
-#ifdef PRINT_MB
-printf("%c", s->ac_pred ? 'A' : 'I');
-#endif
+        *mb_type_ptr = MB_TYPE_INTRA;
         if(s->inter_intra_pred){
             s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
 //            printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
@@ -1616,7 +1655,7 @@ printf("%c", s->ac_pred ? 'A' : 'I');
     }
 
     for (i = 0; i < 6; i++) {
-        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+        if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
        {
            fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
            return -1;
@@ -1625,14 +1664,14 @@ printf("%c", s->ac_pred ? 'A' : 'I');
     
     return 0;
 }
-
+//#define ERROR_DETAILS
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
-                              int n, int coded)
+                              int n, int coded, const uint8_t *scan_table)
 {
-    int code, level, i, j, last, run, run_diff;
+    int level, i, last, run, run_diff;
     int dc_pred_dir;
     RLTable *rl;
-    const UINT8 *scan_table;
+    RL_VLC_ELEM *rl_vlc;
     int qmul, qadd;
 
     if (s->mb_intra) {
@@ -1642,14 +1681,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
        /* DC coef */
         set_stat(ST_DC);
         level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
-#ifdef PRINT_MB
-{
-    static int c;
-    if(n==0) c=0;
-    if(n==4) printf("%X", c);
-    c+= c +dc_pred_dir;
-}
-#endif
+        
         if (level < 0){
             fprintf(stderr, "dc overflow- block: %d qscale: %d//\n", n, s->qscale);
             if(s->inter_intra_pred) level=0;
@@ -1671,23 +1703,24 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         block[0] = level;
 
         run_diff = 0;
-       i = 1;
+        i = 0;
         if (!coded) {
             goto not_coded;
         }
         if (s->ac_pred) {
             if (dc_pred_dir == 0) 
-                scan_table = s->intra_v_scantable; /* left */
+                scan_table = s->intra_v_scantable.permutated; /* left */
             else
-                scan_table = s->intra_h_scantable; /* top */
+                scan_table = s->intra_h_scantable.permutated; /* top */
         } else {
-            scan_table = s->intra_scantable;
+            scan_table = s->intra_scantable.permutated;
         }
         set_stat(ST_INTRA_AC);
+        rl_vlc= rl->rl_vlc[0];
     } else {
         qmul = s->qscale << 1;
         qadd = (s->qscale - 1) | 1;
-       i = 0;
+        i = -1;
         rl = &rl_table[3 + s->rl_table_index];
 
         if(s->msmpeg4_version==2)
@@ -1696,53 +1729,68 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             run_diff = 1;
 
         if (!coded) {
-            s->block_last_index[n] = i - 1;
+            s->block_last_index[n] = i;
             return 0;
         }
-        scan_table = s->inter_scantable;
+        if(!scan_table)
+            scan_table = s->inter_scantable.permutated;
         set_stat(ST_INTER_AC);
+        rl_vlc= rl->rl_vlc[s->qscale];
     }
-
+  {
+    OPEN_READER(re, &s->gb);
     for(;;) {
-        code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-        if (code < 0){
-            fprintf(stderr, "illegal AC-VLC code at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
-        }
-        if (code == rl->n) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+        if (level==0) {
+            int cache;
+            cache= GET_CACHE(re, &s->gb);
             /* escape */
-            if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
-                if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
+            if (s->msmpeg4_version==1 || (cache&0x80000000)==0) {
+                if (s->msmpeg4_version==1 || (cache&0x40000000)==0) {
                     /* third escape */
+                    if(s->msmpeg4_version!=1) LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
                     if(s->msmpeg4_version<=3){
-                        last= get_bits1(&s->gb);
-                        run= get_bits(&s->gb, 6);  
-                        level= get_bits(&s->gb, 8);
-                        level= ((int8_t)level);
-                    }else{
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                        run=   SHOW_UBITS(re, &s->gb, 6); SKIP_CACHE(re, &s->gb, 6);
+                        level= SHOW_SBITS(re, &s->gb, 8); LAST_SKIP_CACHE(re, &s->gb, 8);
+                        SKIP_COUNTER(re, &s->gb, 1+6+8);
+                    }else{                        
                         int sign;
-                        last= get_bits1(&s->gb);
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_BITS(re, &s->gb, 1);
                         if(!s->esc3_level_length){
                             int ll;
                             //printf("ESC-3 %X at %d %d\n", show_bits(&s->gb, 24), s->mb_x, s->mb_y);
                             if(s->qscale<8){
-                                ll= get_bits(&s->gb, 3);
+                                ll= SHOW_UBITS(re, &s->gb, 3); SKIP_BITS(re, &s->gb, 3);
                                 if(ll==0){
-                                    if(get_bits1(&s->gb)) printf("cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
+                                    if(SHOW_UBITS(re, &s->gb, 1)) printf("cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
+                                    SKIP_BITS(re, &s->gb, 1);
                                     ll=8;
                                 }
                             }else{
                                 ll=2;
-                                while(ll<8 && get_bits1(&s->gb)==0) ll++;
+                                while(ll<8 && SHOW_UBITS(re, &s->gb, 1)==0){
+                                    ll++;
+                                    SKIP_BITS(re, &s->gb, 1);
+                                }
+                                if(ll<8) SKIP_BITS(re, &s->gb, 1);
                             }
 
                             s->esc3_level_length= ll;
-                            s->esc3_run_length= get_bits(&s->gb, 2) + 3;
+                            s->esc3_run_length= SHOW_UBITS(re, &s->gb, 2) + 3; SKIP_BITS(re, &s->gb, 2);
 //printf("level length:%d, run length: %d\n", ll, s->esc3_run_length);
+                            UPDATE_CACHE(re, &s->gb);
                         }
-                        run= get_bits(&s->gb, s->esc3_run_length);  
-                        sign= get_bits1(&s->gb);
-                        level= get_bits(&s->gb, s->esc3_level_length);
+                        run=   SHOW_UBITS(re, &s->gb, s->esc3_run_length); 
+                        SKIP_BITS(re, &s->gb, s->esc3_run_length);
+                        
+                        sign=  SHOW_UBITS(re, &s->gb, 1); 
+                        SKIP_BITS(re, &s->gb, 1);
+                        
+                        level= SHOW_UBITS(re, &s->gb, s->esc3_level_length); 
+                        SKIP_BITS(re, &s->gb, s->esc3_level_length);
                         if(sign) level= -level;
                     }
 //printf("level: %d, run: %d at %d %d\n", level, run, s->mb_x, s->mb_y);
@@ -1774,65 +1822,95 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                         fprintf(stderr, "|level| overflow in 3. esc\n");
                         return DECODING_AC_LOST;
                     }
+#endif
+                    i+= run + 1;
+                    if(last) i+=192;
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    fprintf(stderr, "illegal vlc code in ESC3 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    fprintf(stderr, "run overflow in ESC3 i=%d run=%d level=%d\n", i, run, level);
 #endif
                 } else {
                     /* second escape */
-                    code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-                    if (code < 0 || code >= rl->n){
-                        fprintf(stderr, "illegal ESC2-VLC code %d at %d %d\n", code, s->mb_x, s->mb_y);
-                        return -1;
-                    }
-                    run = rl->table_run[code];
-                    level = rl->table_level[code];
-                    last = code >= rl->last;
-                    run += rl->max_run[last][level] + run_diff;
-                    level= level * qmul + qadd;
-                    if (get_bits1(&s->gb))
-                        level = -level;
+#if MIN_CACHE_BITS < 23
+                    LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+#else
+                    SKIP_BITS(re, &s->gb, 2);
+#endif
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                    i+= run + rl->max_run[run>>7][level/qmul] + run_diff; //FIXME opt indexing
+                    level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                    LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    fprintf(stderr, "illegal vlc code in ESC2 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    fprintf(stderr, "run overflow in ESC2 i=%d run=%d level=%d\n", i, run, level);
+#endif
                 }
             } else {
                 /* first escape */
-                code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-                if (code < 0 || code >= rl->n){
-                    fprintf(stderr, "illegal ESC2-VLC code %d at %d %d\n", code, s->mb_x, s->mb_y);
-                    return -1;
-                }
-                run = rl->table_run[code];
-                level = rl->table_level[code];
-                last = code >= rl->last;
-                level += rl->max_level[last][run];
-                level= level * qmul + qadd;
-                if (get_bits1(&s->gb))
-                    level = -level;
+#if MIN_CACHE_BITS < 22
+                LAST_SKIP_BITS(re, &s->gb, 1);
+                UPDATE_CACHE(re, &s->gb);
+#else
+                SKIP_BITS(re, &s->gb, 1);
+#endif
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                i+= run;
+                level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    fprintf(stderr, "illegal vlc code in ESC1 level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    fprintf(stderr, "run overflow in ESC1 i=%d run=%d level=%d\n", i, run, level);
+#endif
             }
         } else {
-            run = rl->table_run[code];
-            level = rl->table_level[code] * qmul + qadd;
-            last = code >= rl->last;
-            if (get_bits1(&s->gb))
-                level = -level;
-        }
-        i += run;
-        if (i >= 64){
-            fprintf(stderr, "run too long at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
+            i+= run;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            LAST_SKIP_BITS(re, &s->gb, 1);
+#ifdef ERROR_DETAILS
+                if(run==66)
+                    fprintf(stderr, "illegal vlc code level=%d\n", level);
+                else if((i>62 && i<192) || i>192+63)
+                    fprintf(stderr, "run overflow i=%d run=%d level=%d\n", i, run, level);
+#endif
         }
+        if (i > 62){
+            i-= 192;
+            if(i&(~63)){
+                const int left= s->gb.size_in_bits - get_bits_count(&s->gb);
+                if(((i+192 == 64 && level/qmul==-1) || s->error_resilience<=1) && left>=0){
+                    fprintf(stderr, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y);
+                    break;
+                }else{
+                    fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                    return -1;
+                }
+            }
 
-       j = scan_table[i];
-        block[j] = level;
-        i++;
-        if (last)
+            block[scan_table[i]] = level;
             break;
+        }
+
+        block[scan_table[i]] = level;
     }
+    CLOSE_READER(re, &s->gb);
+  }
  not_coded:
     if (s->mb_intra) {
         mpeg4_pred_ac(s, block, n, dc_pred_dir);
         if (s->ac_pred) {
-            i = 64; /* XXX: not optimal */
+            i = 63; /* XXX: not optimal */
         }
     }
-    if(s->msmpeg4_version==4 && i>1) i=64; //FIXME/XXX optimize
-    s->block_last_index[n] = i - 1;
+    if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
+    s->block_last_index[n] = i;
     
     return 0;
 }
@@ -1872,14 +1950,14 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
     }
 
     if(s->msmpeg4_version==1){
-        INT32 *dc_val;
+        int32_t *dc_val;
         pred = msmpeg4v1_pred_dc(s, n, &dc_val);
         level += pred;
         
         /* update predictor */
         *dc_val= level;
     }else{
-        UINT16 *dc_val;
+        uint16_t *dc_val;
         pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
         level += pred;
 
@@ -1932,3 +2010,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
     *my_ptr = my;
     return 0;
 }
+
+/* cleanest way to support it
+ * there is too much shared between versions so that we cant have 1 file per version & 1 common
+ * as allmost everything would be in the common file 
+ */
+#include "wmv2.c"