closed gop support & flags2 as all bits in flags are used

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index 959a0e1c1510434d1b362a99b8f2e101d56cbcc0..43ed13e99f778a958554516a8110ed03d9c6bd39 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -59,9 +59,6 @@ typedef struct SPS{
      
      int profile_idc;
      int level_idc;
-    int multiple_slice_groups;         ///< more_than_one_slice_group_allowed_flag
-    int arbitrary_slice_order;         ///< arbitrary_slice_order_allowed_flag
-    int redundant_slices;              ///< redundant_slices_allowed_flag
      int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
      int poc_type;                      ///< pic_order_cnt_type
      int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
@@ -70,15 +67,19 @@ typedef struct SPS{
      int offset_for_top_to_bottom_field;
      int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
      int ref_frame_count;               ///< num_ref_frames
-    int required_frame_num_update_behaviour_flag;
+    int gaps_in_frame_num_allowed_flag;
      int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
      int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
      int frame_mbs_only_flag;
      int mb_aff;                        ///<mb_adaptive_frame_field_flag
      int direct_8x8_inference_flag;
+    int crop;                   ///< frame_cropping_flag
+    int crop_left;              ///< frame_cropping_rect_left_offset
+    int crop_right;             ///< frame_cropping_rect_right_offset
+    int crop_top;               ///< frame_cropping_rect_top_offset
+    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
      int vui_parameters_present_flag;
-    int sar_width;
-    int sar_height;
+    AVRational sar;
      short offset_for_ref_frame[256]; //FIXME dyn aloc?
  }SPS;
  
@@ -100,11 +101,6 @@ typedef struct PPS{
      int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
      int constrained_intra_pred; ///< constrained_intra_pred_flag
      int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
-    int crop;                   ///< frame_cropping_flag
-    int crop_left;              ///< frame_cropping_rect_left_offset
-    int crop_right;             ///< frame_cropping_rect_right_offset
-    int crop_top;               ///< frame_cropping_rect_top_offset
-    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
  }PPS;
  
  /**
@@ -149,8 +145,6 @@ typedef struct H264Context{
      uint8_t *rbsp_buffer;
      int rbsp_buffer_size;
  
-    int mb_stride; ///< stride of some mb tables
-
      int chroma_qp; //QPc
  
      int prev_mb_skiped; //FIXME remove (IMHO not used)
@@ -197,6 +191,12 @@ typedef struct H264Context{
      int b_stride;
      int b8_stride;
  
+    int halfpel_flag;
+    int thirdpel_flag;
+
+    int unknown_svq3_flag;
+    int next_slice_index;
+
      SPS sps_buffer[MAX_SPS_COUNT];
      SPS sps; ///< current sps
      
@@ -293,13 +293,25 @@ static VLC chroma_dc_total_zeros_vlc[3];
  static VLC run_vlc[6];
  static VLC run7_vlc;
  
+static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
+static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
+
+static inline uint32_t pack16to32(int a, int b){
+#ifdef WORDS_BIGENDIAN
+   return (b&0xFFFF) + (a<<16);
+#else
+   return (a&0xFFFF) + (b<<16);
+#endif
+}
+
  /**
   * fill a rectangle.
   * @param h height of the recatangle, should be a constant
   * @param w width of the recatangle, should be a constant
   * @param size the size of val (1 or 4), should be a constant
   */
-static inline void fill_rectangle(void *p, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
+static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
+    uint8_t *p= (uint8_t*)vp;
      assert(size==1 || size==4);
      
      w      *= size;
@@ -314,6 +326,8 @@ static inline void fill_rectangle(void *p, int w, int h, int stride, uint32_t va
          *(uint16_t*)(p + 1*stride)=
          *(uint16_t*)(p + 2*stride)=
          *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
+    }else if(w==4 && h==1){
+        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
      }else if(w==4 && h==2){
          *(uint32_t*)(p + 0*stride)=
          *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
@@ -355,7 +369,7 @@ static inline void fill_rectangle(void *p, int w, int h, int stride, uint32_t va
  
  static inline void fill_caches(H264Context *h, int mb_type){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
      int topleft_xy, top_xy, topright_xy, left_xy[2];
      int topleft_type, top_type, topright_type, left_type[2];
      int left_block[4];
@@ -365,10 +379,13 @@ static inline void fill_caches(H264Context *h, int mb_type){
      
      if(h->sps.mb_aff){
      //FIXME
+        topleft_xy = 0; /* avoid warning */
+        top_xy = 0; /* avoid warning */
+        topright_xy = 0; /* avoid warning */
      }else{
-        topleft_xy = mb_xy-1 - h->mb_stride;
-        top_xy     = mb_xy   - h->mb_stride;
-        topright_xy= mb_xy+1 - h->mb_stride;
+        topleft_xy = mb_xy-1 - s->mb_stride;
+        top_xy     = mb_xy   - s->mb_stride;
+        topright_xy= mb_xy+1 - s->mb_stride;
          left_xy[0]   = mb_xy-1;
          left_xy[1]   = mb_xy-1;
          left_block[0]= 0;
@@ -602,7 +619,7 @@ static inline void fill_caches(H264Context *h, int mb_type){
  
  static inline void write_back_intra_pred_mode(H264Context *h){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
  
      h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
      h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
@@ -626,7 +643,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
          for(i=0; i<4; i++){
              int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
              if(status<0){
-                fprintf(stderr, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
+                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                  return -1;
              } else if(status){
                  h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
@@ -638,7 +655,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
          for(i=0; i<4; i++){
              int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
              if(status<0){
-                fprintf(stderr, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
+                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
                  return -1;
              } else if(status){
                  h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
@@ -660,7 +677,7 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
      if(!(h->top_samples_available&0x8000)){
          mode= top[ mode ];
          if(mode<0){
-            fprintf(stderr, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
              return -1;
          }
      }
@@ -668,7 +685,7 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
      if(!(h->left_samples_available&0x8000)){
          mode= left[ mode ];
          if(mode<0){
-            fprintf(stderr, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
              return -1;
          } 
      }
@@ -693,7 +710,7 @@ static inline int pred_intra_mode(H264Context *h, int n){
  
  static inline void write_back_non_zero_count(H264Context *h){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
  
      h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
      h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
@@ -751,7 +768,6 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
   * @param my the y component of the predicted motion vector
   */
  static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
-    MpegEncContext * const s = &h->s;
      const int index8= scan8[n];
      const int top_ref=      h->ref_cache[list][ index8 - 8 ];
      const int left_ref=     h->ref_cache[list][ index8 - 1 ];
@@ -772,7 +788,6 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
  
      diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
      match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
-    
      if(match_count > 1){ //most common
          *mx= mid_pred(A[0], B[0], C[0]);
          *my= mid_pred(A[1], B[1], C[1]);
@@ -797,7 +812,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
          }
      }
          
-    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, s->mb_x, s->mb_y, n, list);
+    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
  }
  
  /**
@@ -807,12 +822,11 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
   * @param my the y component of the predicted motion vector
   */
  static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
-    MpegEncContext * const s = &h->s;
      if(n==0){
          const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
          const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
  
-        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], s->mb_x, s->mb_y, n, list);
+        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
          
          if(top_ref == ref){
              *mx= B[0];
@@ -823,7 +837,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
          const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
          const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
          
-        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], s->mb_x, s->mb_y, n, list);
+        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
  
          if(left_ref == ref){
              *mx= A[0];
@@ -843,12 +857,11 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
   * @param my the y component of the predicted motion vector
   */
  static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
-    MpegEncContext * const s = &h->s;
      if(n==0){
          const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
          const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
          
-        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], s->mb_x, s->mb_y, n, list);
+        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
  
          if(left_ref == ref){
              *mx= A[0];
@@ -861,7 +874,7 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
  
          diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
          
-        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], s->mb_x, s->mb_y, n, list);
+        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
  
          if(diagonal_ref == ref){ 
              *mx= C[0];
@@ -875,11 +888,10 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
  }
  
  static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
-    MpegEncContext * const s = &h->s;
      const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
      const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
  
-    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, s->mb_x, s->mb_y);
+    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
  
      if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
         || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
@@ -896,7 +908,6 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
  
  static inline void write_back_motion(H264Context *h, int mb_type){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
      const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
      const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
      int list;
@@ -1474,7 +1485,7 @@ static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
      src[2+0*stride]=
      src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
      src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-};
+}
  
  static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
      LOAD_TOP_EDGE    
@@ -1497,7 +1508,7 @@ static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
      src[3+2*stride]=
      src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
      src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
-};
+}
  
  static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
      const int lt= src[-1-1*stride];
@@ -1521,7 +1532,7 @@ static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride
      src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
      src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
      src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-};
+}
  
  static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
      LOAD_TOP_EDGE    
@@ -1544,7 +1555,7 @@ static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride)
      src[3+1*stride]=
      src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
      src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
-};
+}
  
  static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
      LOAD_LEFT_EDGE    
@@ -1565,7 +1576,7 @@ static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride)
      src[2+2*stride]=
      src[2+3*stride]=
      src[3+3*stride]=l3;
-};
+}
      
  static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
      const int lt= src[-1-1*stride];
@@ -1589,7 +1600,7 @@ static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int strid
      src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
      src[0+3*stride]=(l2 + l3 + 1)>>1;
      src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-};
+}
  
  static void pred16x16_vertical_c(uint8_t *src, int stride){
      int i;
@@ -1682,43 +1693,48 @@ static void pred16x16_128_dc_c(uint8_t *src, int stride){
      }
  }
  
-static void pred16x16_plane_c(uint8_t *src, int stride){
-    uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    int i, dx, dy, dc;
-    int temp[16];
-    
-    dc= 16*(src[15-stride] + src[-1+15*stride]);
-    
-    dx=dy=0;
-    for(i=1; i<9; i++){
-        dx += i*(src[7+i-stride] - src[7-i-stride]);
-        dy += i*(src[-1+(7+i)*stride] - src[-1+(7-i)*stride]);
-    }
-    dx= (5*dx+32)>>6;
-    dy= (5*dy+32)>>6;
-    
-    dc += 16;
+static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
+  int i, j, k;
+  int a;
+  uint8_t *cm = cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+7-stride;
+  const uint8_t *src1 = src+8*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=8; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  if(svq3){
+    H = ( 5*(H/4) ) / 16;
+    V = ( 5*(V/4) ) / 16;
+
+    /* required for 100% accuracy */
+    i = H; H = V; V = i;
+  }else{
+    H = ( 5*H+32 ) >> 6;
+    V = ( 5*V+32 ) >> 6;
+  }
+
+  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
+  for(j=16; j>0; --j) {
+    int b = a;
+    a += V;
+    for(i=-16; i<0; i+=4) {
+      src[16+i] = cm[ (b    ) >> 5 ];
+      src[17+i] = cm[ (b+  H) >> 5 ];
+      src[18+i] = cm[ (b+2*H) >> 5 ];
+      src[19+i] = cm[ (b+3*H) >> 5 ];
+      b += 4*H;
+    }
+    src += stride;
+  }
+}
  
-    //FIXME modifiy dc,dx,dy to avoid -7
-    
-    for(i=0; i<16; i++)
-        temp[i]= dx*(i-7) + dc;
-    
-    if(   (dc - ABS(dx)*8 - ABS(dy)*8)>>5 < 0
-       || (dc + ABS(dx)*8 + ABS(dy)*8)>>5 > 255){
-    
-        for(i=0; i<16; i++){
-            int j;
-            for(j=0; j<16; j++)
-                src[j + i*stride]= cm[ (temp[j] + dy*(i-7))>>5 ];
-        }
-    }else{
-        for(i=0; i<16; i++){
-            int j;
-            for(j=0; j<16; j++)
-                src[j + i*stride]= (temp[j] + dy*(i-7))>>5;
-        }
-    }
+static void pred16x16_plane_c(uint8_t *src, int stride){
+    pred16x16_plane_compat_c(src, stride, 0);
  }
  
  static void pred8x8_vertical_c(uint8_t *src, int stride){
@@ -1825,42 +1841,36 @@ static void pred8x8_dc_c(uint8_t *src, int stride){
  }
  
  static void pred8x8_plane_c(uint8_t *src, int stride){
-    uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    int i, dx, dy, dc;
-    int temp[8];
-
-    dc= 16*(src[7-stride] + src[-1+7*stride]);
-    
-    dx=dy=0;
-    for(i=1; i<5; i++){
-        dx += i*(src[3+i-stride] - src[3-i-stride]);
-        dy += i*(src[-1+(3+i)*stride] - src[-1+(3-i)*stride]);
-    }
-    dx= (17*dx+16)>>5;
-    dy= (17*dy+16)>>5;
-    
-    dc += 16;
-    
-    //FIXME modifiy dc,dx,dy to avoid -3
-    
-    for(i=0; i<8; i++)
-        temp[i]= dx*(i-3) + dc;
-    
-    if(   (dc - ABS(dx)*4 - ABS(dy)*4)>>5 < 0
-       || (dc + ABS(dx)*4 + ABS(dy)*4)>>5 > 255){
-    
-        for(i=0; i<8; i++){
-            int j;
-            for(j=0; j<8; j++)
-                src[j + i*stride]= cm[ (temp[j] + dy*(i-3))>>5 ];
-        }
-    }else{
-        for(i=0; i<8; i++){
-            int j;
-            for(j=0; j<8; j++)
-                src[j + i*stride]= (temp[j] + dy*(i-3))>>5;
-        }
-    }
+  int j, k;
+  int a;
+  uint8_t *cm = cropTbl + MAX_NEG_CROP;
+  const uint8_t * const src0 = src+3-stride;
+  const uint8_t *src1 = src+4*stride-1;
+  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
+  int H = src0[1] - src0[-1];
+  int V = src1[0] - src2[ 0];
+  for(k=2; k<=4; ++k) {
+    src1 += stride; src2 -= stride;
+    H += k*(src0[k] - src0[-k]);
+    V += k*(src1[0] - src2[ 0]);
+  }
+  H = ( 17*H+16 ) >> 5;
+  V = ( 17*V+16 ) >> 5;
+
+  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
+  for(j=8; j>0; --j) {
+    int b = a;
+    a += V;
+    src[0] = cm[ (b    ) >> 5 ];
+    src[1] = cm[ (b+  H) >> 5 ];
+    src[2] = cm[ (b+2*H) >> 5 ];
+    src[3] = cm[ (b+3*H) >> 5 ];
+    src[4] = cm[ (b+4*H) >> 5 ];
+    src[5] = cm[ (b+5*H) >> 5 ];
+    src[6] = cm[ (b+6*H) >> 5 ];
+    src[7] = cm[ (b+7*H) >> 5 ];
+    src += stride;
+  }
  }
  
  static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
@@ -1889,7 +1899,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
         || full_my < 0-extra_height 
         || full_mx + 16/*FIXME*/ > s->width + extra_width 
         || full_my + 16/*FIXME*/ > s->height + extra_height){
-        ff_emulated_edge_mc(s, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
              src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
          emu=1;
      }
@@ -1902,13 +1912,13 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
      if(s->flags&CODEC_FLAG_GRAY) return;
      
      if(emu){
-        ff_emulated_edge_mc(s, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
              src_cb= s->edge_emu_buffer;
      }
      chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
  
      if(emu){
-        ff_emulated_edge_mc(s, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
+        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
              src_cr= s->edge_emu_buffer;
      }
      chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
@@ -1952,7 +1962,7 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                        qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
                        qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
      const int mb_type= s->current_picture.mb_type[mb_xy];
      
      assert(IS_INTER(mb_type));
@@ -2094,19 +2104,7 @@ static void init_pred_ptrs(H264Context *h){
      h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
  }
  
-//FIXME factorize
-#define CHECKED_ALLOCZ(p, size)\
-{\
-    p= av_mallocz(size);\
-    if(p==NULL){\
-        perror("malloc");\
-        goto fail;\
-    }\
-}
-
  static void free_tables(H264Context *h){
-    MpegEncContext * const s = &h->s;
-
      av_freep(&h->intra4x4_pred_mode);
      av_freep(&h->non_zero_count);
      av_freep(&h->slice_table_base);
@@ -2122,7 +2120,7 @@ static void free_tables(H264Context *h){
   */
  static int alloc_tables(H264Context *h){
      MpegEncContext * const s = &h->s;
-    const int big_mb_num= h->mb_stride * (s->mb_height+1);
+    const int big_mb_num= s->mb_stride * (s->mb_height+1);
      int x,y;
  
      CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
@@ -2130,13 +2128,13 @@ static int alloc_tables(H264Context *h){
      CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
  
      memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
-    h->slice_table= h->slice_table_base + h->mb_stride + 1;
+    h->slice_table= h->slice_table_base + s->mb_stride + 1;
  
      CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
      CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
      for(y=0; y<s->mb_height; y++){
          for(x=0; x<s->mb_width; x++){
-            const int mb_xy= x + y*h->mb_stride;
+            const int mb_xy= x + y*s->mb_stride;
              const int b_xy = 4*x + 4*y*h->b_stride;
              const int b8_xy= 2*x + 2*y*h->b8_stride;
          
@@ -2153,7 +2151,6 @@ fail:
  
  static void common_init(H264Context *h){
      MpegEncContext * const s = &h->s;
-    int i;
  
      s->width = s->avctx->width;
      s->height = s->avctx->height;
@@ -2161,6 +2158,7 @@ static void common_init(H264Context *h){
      
      init_pred_ptrs(h);
  
+    s->unrestricted_mv=1;
      s->decode=1; //FIXME
  }
  
@@ -2211,7 +2209,7 @@ static void hl_decode_mb(H264Context *h){
      MpegEncContext * const s = &h->s;
      const int mb_x= s->mb_x;
      const int mb_y= s->mb_y;
-    const int mb_xy= mb_x + mb_y*h->mb_stride;
+    const int mb_xy= mb_x + mb_y*s->mb_stride;
      const int mb_type= s->current_picture.mb_type[mb_xy];
      uint8_t  *dest_y, *dest_cb, *dest_cr;
      int linesize, uvlinesize /*dct_offset*/;
@@ -2262,15 +2260,22 @@ static void hl_decode_mb(H264Context *h){
                      }
  
                      h->pred4x4[ dir ](ptr, topright, linesize);
-                    if(h->non_zero_count_cache[ scan8[i] ])
-                        h264_add_idct_c(ptr, h->mb + i*16, linesize);
+                    if(h->non_zero_count_cache[ scan8[i] ]){
+                        if(s->codec_id == CODEC_ID_H264)
+                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
+                        else
+                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                    }
                  }
              }
          }else{
              h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-            h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
+            if(s->codec_id == CODEC_ID_H264)
+                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
+            else
+                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
          }
-    }else{
+    }else if(s->codec_id == CODEC_ID_H264){
          hl_motion(h, dest_y, dest_cb, dest_cr,
                    s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
                    s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
@@ -2278,10 +2283,19 @@ static void hl_decode_mb(H264Context *h){
  
  
      if(!IS_INTRA4x4(mb_type)){
-        for(i=0; i<16; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                uint8_t * const ptr= dest_y + h->block_offset[i];
-                h264_add_idct_c(ptr, h->mb + i*16, linesize);
+        if(s->codec_id == CODEC_ID_H264){
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + h->block_offset[i];
+                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
+                }
+            }
+        }else{
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + h->block_offset[i];
+                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                }
              }
          }
      }
@@ -2289,16 +2303,31 @@ static void hl_decode_mb(H264Context *h){
      if(!(s->flags&CODEC_FLAG_GRAY)){
          chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
          chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
-        for(i=16; i<16+4; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                uint8_t * const ptr= dest_cb + h->block_offset[i];
-                h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+        if(s->codec_id == CODEC_ID_H264){
+            for(i=16; i<16+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cb + h->block_offset[i];
+                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+                }
              }
-        }
-        for(i=20; i<20+4; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                uint8_t * const ptr= dest_cr + h->block_offset[i];
-                h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+            for(i=20; i<20+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cr + h->block_offset[i];
+                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+                }
+            }
+        }else{
+            for(i=16; i<16+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cb + h->block_offset[i];
+                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                }
+            }
+            for(i=20; i<20+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cr + h->block_offset[i];
+                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                }
              }
          }
      }
@@ -2419,7 +2448,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                  
                  
                  if(index >= h->ref_count[list]){
-                    fprintf(stderr, "reference count overflow\n");
+                    av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
                      return -1;
                  }
                  
@@ -2428,7 +2457,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                          const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
  
                          if(abs_diff_pic_num >= h->max_pic_num){
-                            fprintf(stderr, "abs_diff_pic_num overflow\n");
+                            av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
                              return -1;
                          }
  
@@ -2450,7 +2479,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                      }
  
                      if(i < index){
-                        fprintf(stderr, "reference picture missing during reorder\n");
+                        av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
                          memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
                      }else if(i > index){
                          Picture tmp= h->ref_list[list][i];
@@ -2462,7 +2491,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                  }else if(reordering_of_pic_nums_idc==3) 
                      break;
                  else{
-                    fprintf(stderr, "illegal reordering_of_pic_nums_idc\n");
+                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
                      return -1;
                  }
              }
@@ -2532,12 +2561,12 @@ static Picture * remove_short(H264Context *h, int frame_num){
      int i;
      
      if(s->avctx->debug&FF_DEBUG_MMCO)
-        printf("remove short %d count %d\n", frame_num, h->short_ref_count);
+        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
      
      for(i=0; i<h->short_ref_count; i++){
          Picture *pic= h->short_ref[i];
          if(s->avctx->debug&FF_DEBUG_MMCO)
-            printf("%d %d %X\n", i, pic->frame_num, (int)pic);
+            av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
          if(pic->frame_num == frame_num){
              h->short_ref[i]= NULL;
              memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
@@ -2576,11 +2605,11 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
      Picture *pic;
      
      if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
-        printf("no mmco here\n");
+        av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
          
      for(i=0; i<mmco_count; i++){
          if(s->avctx->debug&FF_DEBUG_MMCO)
-            printf("mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
+            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
  
          switch(mmco[i].opcode){
          case MMCO_SHORT2UNUSED:
@@ -2638,7 +2667,7 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
          pic= remove_short(h, s->current_picture_ptr->frame_num);
          if(pic){
              pic->reference=0;
-            fprintf(stderr, "illegal short term buffer state detected\n");
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
          }
          
          if(h->short_ref_count)
@@ -2681,13 +2710,13 @@ static int decode_ref_pic_marking(H264Context *h){
                  if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
                      h->mmco[i].long_index= get_ue_golomb(&s->gb);
                      if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
-                        fprintf(stderr, "illegal long ref in memory management control operation %d\n", opcode);
+                        av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
                          return -1;
                      }
                  }
                      
                  if(opcode > MMCO_LONG){
-                    fprintf(stderr, "illegal memory management control operation %d\n", opcode);
+                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
                      return -1;
                  }
              }
@@ -2800,7 +2829,6 @@ static int decode_slice_header(H264Context *h){
      int first_mb_in_slice, pps_id;
      int num_ref_idx_active_override_flag;
      static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
-    float new_aspect;
  
      s->current_picture.reference= h->nal_ref_idc != 0;
  
@@ -2808,7 +2836,7 @@ static int decode_slice_header(H264Context *h){
  
      h->slice_type= get_ue_golomb(&s->gb);
      if(h->slice_type > 9){
-        fprintf(stderr, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
+        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
      }
      if(h->slice_type > 4){
          h->slice_type -= 5;
@@ -2822,15 +2850,23 @@ static int decode_slice_header(H264Context *h){
          
      pps_id= get_ue_golomb(&s->gb);
      if(pps_id>255){
-        fprintf(stderr, "pps_id out of range\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
          return -1;
      }
      h->pps= h->pps_buffer[pps_id];
+    if(h->pps.slice_group_count == 0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
+        return -1;
+    }
+
      h->sps= h->sps_buffer[ h->pps.sps_id ];
+    if(h->sps.log2_max_frame_num == 0){
+        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
+        return -1;
+    }
      
      s->mb_width= h->sps.mb_width;
      s->mb_height= h->sps.mb_height;
-    h->mb_stride= s->mb_width + 1;
      
      h->b_stride=  s->mb_width*4;
      h->b8_stride= s->mb_width*2;
@@ -2838,24 +2874,14 @@ static int decode_slice_header(H264Context *h){
      s->mb_x = first_mb_in_slice % s->mb_width;
      s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
      
-    s->width = 16*s->mb_width - 2*(h->pps.crop_left + h->pps.crop_right );
+    s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
      if(h->sps.frame_mbs_only_flag)
-        s->height= 16*s->mb_height - 2*(h->pps.crop_top  + h->pps.crop_bottom);
+        s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
      else
-        s->height= 16*s->mb_height - 4*(h->pps.crop_top  + h->pps.crop_bottom); //FIXME recheck
+        s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
      
-    if(h->pps.crop_left || h->pps.crop_top){
-        fprintf(stderr, "insane croping not completly supported, this could look slightly wrong ...\n");
-    }
-
-    if(s->aspected_height) //FIXME emms at end of slice ?
-        new_aspect= h->sps.sar_width*s->width / (float)(s->height*h->sps.sar_height);
-    else
-        new_aspect=0;
-
      if (s->context_initialized 
-        && (   s->width != s->avctx->width || s->height != s->avctx->height 
-            || ABS(new_aspect - s->avctx->aspect_ratio) > 0.001)) {
+        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
          free_tables(h);
          MPV_common_end(s);
      }
@@ -2867,7 +2893,7 @@ static int decode_slice_header(H264Context *h){
  
          s->avctx->width = s->width;
          s->avctx->height = s->height;
-        s->avctx->aspect_ratio= new_aspect;
+        s->avctx->sample_aspect_ratio= h->sps.sar;
      }
  
      if(first_mb_in_slice == 0){
@@ -2895,7 +2921,7 @@ static int decode_slice_header(H264Context *h){
      }
          
      if(h->nal_unit_type == NAL_IDR_SLICE){
-        int idr_pic_id= get_ue_golomb(&s->gb);
+        get_ue_golomb(&s->gb); /* idr_pic_id */
      }
     
      if(h->sps.poc_type==0){
@@ -2935,7 +2961,7 @@ static int decode_slice_header(H264Context *h){
                  h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
  
              if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
-                fprintf(stderr, "reference overflow\n");
+                av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
                  return -1;
              }
          }
@@ -2958,10 +2984,10 @@ static int decode_slice_header(H264Context *h){
      s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
      //FIXME qscale / qp ... stuff
      if(h->slice_type == SP_TYPE){
-        int sp_for_switch_flag= get_bits1(&s->gb);
+        get_bits1(&s->gb); /* sp_for_switch_flag */
      }
      if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
-        int slice_qs_delta= get_se_golomb(&s->gb);
+        get_se_golomb(&s->gb); /* slice_qs_delta */
      }
  
      if( h->pps.deblocking_filter_parameters_present ) {
@@ -2979,9 +3005,9 @@ static int decode_slice_header(H264Context *h){
  #endif
  
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        printf("mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
                 first_mb_in_slice, 
-               ff_get_pict_type_char(h->slice_type),
+               av_get_pict_type_char(h->slice_type),
                 pps_id, h->frame_num,
                 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
                 h->ref_count[0], h->ref_count[1],
@@ -3081,7 +3107,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
              level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
              if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
          }else{
-            fprintf(stderr, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
              return -1;
          }
  
@@ -3122,7 +3148,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
      }
  
      if(zeros_left<0){
-        fprintf(stderr, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
+        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
          return -1;
      }
      
@@ -3162,28 +3188,21 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
   */
  static int decode_mb(H264Context *h){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
      int mb_type, partition_count, cbp;
  
-    memset(h->mb, 0, sizeof(int16_t)*24*16); //FIXME avoid if allready clear (move after skip handlong?
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?    
  
      tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
-
+    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
+                down the code */
      if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
          if(s->mb_skip_run==-1)
              s->mb_skip_run= get_ue_golomb(&s->gb);
          
          if (s->mb_skip_run--) {
-            int i, mx, my;
+            int mx, my;
              /* skip mb */
-#if 0 //FIXME
-            for(i=0;i<6;i++)
-                s->block_last_index[i] = -1;
-         s->mv_type = MV_TYPE_16X16;
-            /* if P type, zero motion vector is implied */
-            s->mv_dir = MV_DIR_FORWARD;
-            s->mb_skiped = 1;
-#endif
  //FIXME b frame
              mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0;
  
@@ -3200,7 +3219,7 @@ static int decode_mb(H264Context *h){
              fill_caches(h, mb_type); //FIXME check what is needed and what not ...
              pred_pskip_motion(h, &mx, &my);
              fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
-            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, (mx&0xFFFF)+(my<<16), 4);
+            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
              write_back_motion(h, mb_type);
  
              s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
@@ -3239,7 +3258,7 @@ static int decode_mb(H264Context *h){
         assert(h->slice_type == I_TYPE);
  decode_intra_mb:
          if(mb_type > 25){
-            fprintf(stderr, "mb_type %d in %c slice to large at %d %d\n", mb_type, ff_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
              return -1;
          }
          partition_count=0;
@@ -3256,7 +3275,7 @@ decode_intra_mb:
      
      if(IS_INTRA_PCM(mb_type)){
          const uint8_t *ptr;
-        int x, y, i;
+        int x, y;
          
          // we assume these blocks are very rare so we dont optimize it
          align_get_bits(&s->gb);
@@ -3335,7 +3354,7 @@ decode_intra_mb:
              for(i=0; i<4; i++){
                  h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                  if(h->sub_mb_type[i] >=13){
-                    fprintf(stderr, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
                      return -1;
                  }
                  sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
@@ -3346,7 +3365,7 @@ decode_intra_mb:
              for(i=0; i<4; i++){
                  h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                  if(h->sub_mb_type[i] >=4){
-                    fprintf(stderr, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
+                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
                      return -1;
                  }
                  sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
@@ -3430,7 +3449,7 @@ decode_intra_mb:
                      my += get_se_golomb(&s->gb);
                      tprintf("final mv:%d %d\n", mx, my);
  
-                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, (mx&0xFFFF) + (my<<16), 4);
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
                  }
              }
          }
@@ -3453,7 +3472,7 @@ decode_intra_mb:
                          my += get_se_golomb(&s->gb);
                          tprintf("final mv:%d %d\n", mx, my);
  
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (mx&0xFFFF) + (my<<16), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
                      }
                  }
              }
@@ -3477,7 +3496,7 @@ decode_intra_mb:
                          my += get_se_golomb(&s->gb);
                          tprintf("final mv:%d %d\n", mx, my);
  
-                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (mx&0xFFFF) + (my<<16), 4);
+                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
                      }
                  }
              }
@@ -3490,7 +3509,7 @@ decode_intra_mb:
      if(!IS_INTRA16x16(mb_type)){
          cbp= get_ue_golomb(&s->gb);
          if(cbp > 47){
-            fprintf(stderr, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
              return -1;
          }
          
@@ -3519,7 +3538,7 @@ decode_intra_mb:
          dquant= get_se_golomb(&s->gb);
  
          if( dquant > 25 || dquant < -26 ){
-            fprintf(stderr, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
              return -1;
          }
          
@@ -3547,7 +3566,7 @@ decode_intra_mb:
                      }
                  }
              }else{
-                memset(&h->non_zero_count_cache[8], 0, 8*4); //FIXME stupid & slow
+                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
              }
          }else{
              for(i8x8=0; i8x8<4; i8x8++){
@@ -3616,7 +3635,7 @@ static int decode_slice(H264Context *h){
          }
  
          if(ret<0){
-            fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+            av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
              ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
  
              return -1;
@@ -3710,18 +3729,17 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
      if( aspect_ratio_info_present_flag ) {
          aspect_ratio_idc= get_bits(&s->gb, 8);
          if( aspect_ratio_idc == EXTENDED_SAR ) {
-            sps->sar_width= get_bits(&s->gb, 16);
-            sps->sar_height= get_bits(&s->gb, 16);
+            sps->sar.num= get_bits(&s->gb, 16);
+            sps->sar.den= get_bits(&s->gb, 16);
          }else if(aspect_ratio_idc < 16){
-            sps->sar_width=  pixel_aspect[aspect_ratio_idc][0];
-            sps->sar_height= pixel_aspect[aspect_ratio_idc][1];
+            sps->sar=  pixel_aspect[aspect_ratio_idc];
          }else{
-            fprintf(stderr, "illegal aspect ratio\n");
+            av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
              return -1;
          }
      }else{
-        sps->sar_width= 
-        sps->sar_height= 0;
+        sps->sar.num= 
+        sps->sar.den= 0;
      }
  //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
  #if 0
@@ -3777,28 +3795,23 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
  
  static inline int decode_seq_parameter_set(H264Context *h){
      MpegEncContext * const s = &h->s;
-    int profile_idc, level_idc, multiple_slice_groups, arbitrary_slice_order, redundant_slices;
+    int profile_idc, level_idc;
      int sps_id, i;
      SPS *sps;
      
      profile_idc= get_bits(&s->gb, 8);
+    get_bits1(&s->gb);   //constraint_set0_flag
+    get_bits1(&s->gb);   //constraint_set1_flag
+    get_bits1(&s->gb);   //constraint_set2_flag
+    get_bits(&s->gb, 5); // reserved
      level_idc= get_bits(&s->gb, 8);
-    multiple_slice_groups= get_bits1(&s->gb);
-    arbitrary_slice_order= get_bits1(&s->gb);
-    redundant_slices= get_bits1(&s->gb);
-    
      sps_id= get_ue_golomb(&s->gb);
      
      sps= &h->sps_buffer[ sps_id ];
-    
      sps->profile_idc= profile_idc;
      sps->level_idc= level_idc;
-    sps->multiple_slice_groups= multiple_slice_groups;
-    sps->arbitrary_slice_order= arbitrary_slice_order;
-    sps->redundant_slices= redundant_slices;
      
      sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
-
      sps->poc_type= get_ue_golomb(&s->gb);
      
      if(sps->poc_type == 0){ //FIXME #define
@@ -3813,12 +3826,12 @@ static inline int decode_seq_parameter_set(H264Context *h){
              sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
      }
      if(sps->poc_type > 2){
-        fprintf(stderr, "illegal POC type %d\n", sps->poc_type);
+        av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
          return -1;
      }
  
      sps->ref_frame_count= get_ue_golomb(&s->gb);
-    sps->required_frame_num_update_behaviour_flag= get_bits1(&s->gb);
+    sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
      sps->mb_width= get_ue_golomb(&s->gb) + 1;
      sps->mb_height= get_ue_golomb(&s->gb) + 1;
      sps->frame_mbs_only_flag= get_bits1(&s->gb);
@@ -3829,18 +3842,36 @@ static inline int decode_seq_parameter_set(H264Context *h){
  
      sps->direct_8x8_inference_flag= get_bits1(&s->gb);
  
+    sps->crop= get_bits1(&s->gb);
+    if(sps->crop){
+        sps->crop_left  = get_ue_golomb(&s->gb);
+        sps->crop_right = get_ue_golomb(&s->gb);
+        sps->crop_top   = get_ue_golomb(&s->gb);
+        sps->crop_bottom= get_ue_golomb(&s->gb);
+        if(sps->crop_left || sps->crop_top){
+            av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completly supported, this could look slightly wrong ...\n");
+        }
+    }else{
+        sps->crop_left  = 
+        sps->crop_right = 
+        sps->crop_top   = 
+        sps->crop_bottom= 0;
+    }
+
      sps->vui_parameters_present_flag= get_bits1(&s->gb);
      if( sps->vui_parameters_present_flag )
          decode_vui_parameters(h, sps);
      
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        printf("sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s %s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n", 
                 sps_id, sps->profile_idc, sps->level_idc,
                 sps->poc_type,
                 sps->ref_frame_count,
                 sps->mb_width, sps->mb_height,
                 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
                 sps->direct_8x8_inference_flag ? "8B8" : "",
+               sps->crop_left, sps->crop_right, 
+               sps->crop_top, sps->crop_bottom, 
                 sps->vui_parameters_present_flag ? "VUI" : ""
                 );
      }
@@ -3858,7 +3889,7 @@ static inline int decode_picture_parameter_set(H264Context *h){
      pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
      if(pps->slice_group_count > 1 ){
          pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
-fprintf(stderr, "FMO not supported\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
          switch(pps->mb_slice_group_map_type){
          case 0:
  #if 0
@@ -3890,12 +3921,13 @@ fprintf(stderr, "FMO not supported\n");
  |)                                                  |   |        |
  |    slice_group_id[ i ]                            |1  |u(v)    |
  #endif
+            break;
          }
      }
      pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
      pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
      if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
-        fprintf(stderr, "reference overflow (pps)\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
          return -1;
      }
      
@@ -3907,21 +3939,9 @@ fprintf(stderr, "FMO not supported\n");
      pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
      pps->constrained_intra_pred= get_bits1(&s->gb);
      pps->redundant_pic_cnt_present = get_bits1(&s->gb);
-    pps->crop= get_bits1(&s->gb);
-    if(pps->crop){
-        pps->crop_left  = get_ue_golomb(&s->gb);
-        pps->crop_right = get_ue_golomb(&s->gb);
-        pps->crop_top   = get_ue_golomb(&s->gb);
-        pps->crop_bottom= get_ue_golomb(&s->gb);
-    }else{
-        pps->crop_left  = 
-        pps->crop_right = 
-        pps->crop_top   = 
-        pps->crop_bottom= 0;
-    }
      
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        printf("pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s crop:%d/%d/%d/%d\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s\n", 
                 pps_id, pps->sps_id,
                 pps->cabac ? "CABAC" : "CAVLC",
                 pps->slice_group_count,
@@ -3930,9 +3950,7 @@ fprintf(stderr, "FMO not supported\n");
                 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
                 pps->deblocking_filter_parameters_present ? "LPAR" : "",
                 pps->constrained_intra_pred ? "CONSTR" : "",
-               pps->redundant_pic_cnt_present ? "REDU" : "",
-               pps->crop_left, pps->crop_right, 
-               pps->crop_top, pps->crop_bottom
+               pps->redundant_pic_cnt_present ? "REDU" : ""
                 );
      }
      
@@ -3945,7 +3963,7 @@ fprintf(stderr, "FMO not supported\n");
   */
  static int find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
      ParseContext *pc= &s->parse_context;
-    int last_addr, i;
+    int i;
      uint32_t state;
  //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
  //    mb_addr= pc->mb_addr - 1;
@@ -3964,15 +3982,15 @@ static int find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
      }
      
      pc->state= state;
-    return -1;
+    return END_NOT_FOUND;
  }
  
  static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
      MpegEncContext * const s = &h->s;
      AVCodecContext * const avctx= s->avctx;
      int buf_index=0;
+#if 0
      int i;
-#if 0    
      for(i=0; i<32; i++){
          printf("%X ", buf[i]);
      }
@@ -3999,7 +4017,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
          bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
  
          if(s->avctx->debug&FF_DEBUG_STARTCODE){
-            printf("NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length);
+            av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length);
          }
          
          buf_index += consumed;
@@ -4035,8 +4053,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
          case NAL_DPC:
              init_get_bits(&h->inter_gb, ptr, bit_length);
              h->inter_gb_ptr= &h->inter_gb;
-            
-            if(h->redundant_pic_count==0)
+
+            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning)
                  decode_slice(h);
              break;
          case NAL_SEI:
@@ -4067,6 +4085,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
          s->current_picture.key_frame= s->pict_type == I_TYPE;
      }
      
+    if(!s->current_picture_ptr) return buf_index; //no frame
+    
      h->prev_frame_num_offset= h->frame_num_offset;
      h->prev_frame_num= h->frame_num;
      if(s->current_picture_ptr->reference){
@@ -4108,10 +4128,10 @@ static int decode_frame(AVCodecContext *avctx,
      H264Context *h = avctx->priv_data;
      MpegEncContext *s = &h->s;
      AVFrame *pict = data; 
-    float new_aspect;
      int buf_index;
      
      s->flags= avctx->flags;
+    s->flags2= avctx->flags2;
  
      *data_size = 0;
     
@@ -4147,21 +4167,15 @@ static int decode_frame(AVCodecContext *avctx,
          *pict= *(AVFrame*)&s->last_picture;
      }
  #endif
+    if(!s->current_picture_ptr){
+        av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
+        return -1;
+    }
+
      *pict= *(AVFrame*)&s->current_picture; //FIXME 
+    ff_print_debug_info(s, pict);
      assert(pict->data[0]);
  //printf("out %d\n", (int)pict->data[0]);
-    if(avctx->debug&FF_DEBUG_QP){
-        int8_t *qtab= pict->qscale_table;
-        int x,y;
-        
-        for(y=0; y<s->mb_height; y++){
-            for(x=0; x<s->mb_width; x++){
-                printf("%2d ", qtab[x + y*s->mb_width]);
-            }
-            printf("\n");
-        }
-        printf("\n");
-    }
  #if 0 //?
  
      /* Return the Picture timestamp as the frame number */
@@ -4179,12 +4193,12 @@ static int decode_frame(AVCodecContext *avctx,
  #if 0
  static inline void fill_mb_avail(H264Context *h){
      MpegEncContext * const s = &h->s;
-    const int mb_xy= s->mb_x + s->mb_y*h->mb_stride;
+    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
  
      if(s->mb_y){
-        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - h->mb_stride - 1] == h->slice_num;
-        h->mb_avail[1]=                            h->slice_table[mb_xy - h->mb_stride    ] == h->slice_num;
-        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - h->mb_stride + 1] == h->slice_num;
+        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
+        h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
+        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
      }else{
          h->mb_avail[0]=
          h->mb_avail[1]=
@@ -4210,7 +4224,7 @@ int main(){
      
      dsputil_init(&dsp, &avctx);
  
-    init_put_bits(&pb, temp, SIZE, NULL, NULL);
+    init_put_bits(&pb, temp, SIZE);
      printf("testing unsigned exp golomb\n");
      for(i=0; i<COUNT; i++){
          START_TIMER
@@ -4235,7 +4249,7 @@ int main(){
      }
      
      
-    init_put_bits(&pb, temp, SIZE, NULL, NULL);
+    init_put_bits(&pb, temp, SIZE);
      printf("testing signed exp golomb\n");
      for(i=0; i<COUNT; i++){
          START_TIMER
@@ -4393,6 +4407,7 @@ AVCodec h264_decoder = {
      NULL,
      decode_end,
      decode_frame,
-    /*CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | */CODEC_CAP_TRUNCATED,
+    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
  };
  
+#include "svq3.c"