matroskadec: simplify matroska_ebmlnum_sint()

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index 06d4947a6925dde1b719ca6160fd3be354490302..684d9a730cdc8f54ed4aabfd6fe1d8b99f5df297 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -49,18 +49,34 @@
  #define DELAYED_PIC_REF 4
  
  static VLC coeff_token_vlc[4];
+static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
+static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
+
  static VLC chroma_dc_coeff_token_vlc;
+static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
+static const int chroma_dc_coeff_token_vlc_table_size = 256;
  
  static VLC total_zeros_vlc[15];
+static VLC_TYPE total_zeros_vlc_tables[15][512][2];
+static const int total_zeros_vlc_tables_size = 512;
+
  static VLC chroma_dc_total_zeros_vlc[3];
+static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
+static const int chroma_dc_total_zeros_vlc_tables_size = 8;
  
  static VLC run_vlc[6];
+static VLC_TYPE run_vlc_tables[6][8][2];
+static const int run_vlc_tables_size = 8;
+
  static VLC run7_vlc;
+static VLC_TYPE run7_vlc_table[96][2];
+static const int run7_vlc_table_size = 96;
  
  static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
  static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
  static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static Picture * remove_long(H264Context *h, int i, int ref_mask);
  
  static av_always_inline uint32_t pack16to32(int a, int b){
  #ifdef WORDS_BIGENDIAN
@@ -78,13 +94,19 @@ const uint8_t ff_div6[52]={
  0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
  };
  
+static const int left_block_options[4][8]={
+    {0,1,2,3,7,10,8,11},
+    {2,2,3,3,8,11,8,11},
+    {0,0,1,1,7,10,7,10},
+    {0,2,0,2,7,10,7,10}
+};
  
  static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      MpegEncContext * const s = &h->s;
      const int mb_xy= h->mb_xy;
      int topleft_xy, top_xy, topright_xy, left_xy[2];
      int topleft_type, top_type, topright_type, left_type[2];
-    int left_block[8];
+    int * left_block;
      int topleft_partition= -1;
      int i;
  
@@ -100,14 +122,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      topleft_xy = top_xy - 1;
      topright_xy= top_xy + 1;
      left_xy[1] = left_xy[0] = mb_xy-1;
-    left_block[0]= 0;
-    left_block[1]= 1;
-    left_block[2]= 2;
-    left_block[3]= 3;
-    left_block[4]= 7;
-    left_block[5]= 10;
-    left_block[6]= 8;
-    left_block[7]= 11;
+    left_block = left_block_options[0];
      if(FRAME_MBAFF){
          const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
          const int top_pair_xy      = pair_xy     - s->mb_stride;
@@ -133,7 +148,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
              topleft_xy -= s->mb_stride;
          } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
              topleft_xy += s->mb_stride;
-            // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
              topleft_partition = 0;
          }
          if (bottom
@@ -146,34 +161,13 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
              left_xy[1] = left_xy[0] = pair_xy - 1;
              if (curr_mb_frame_flag) {
                  if (bottom) {
-                    left_block[0]= 2;
-                    left_block[1]= 2;
-                    left_block[2]= 3;
-                    left_block[3]= 3;
-                    left_block[4]= 8;
-                    left_block[5]= 11;
-                    left_block[6]= 8;
-                    left_block[7]= 11;
+                    left_block = left_block_options[1];
                  } else {
-                    left_block[0]= 0;
-                    left_block[1]= 0;
-                    left_block[2]= 1;
-                    left_block[3]= 1;
-                    left_block[4]= 7;
-                    left_block[5]= 10;
-                    left_block[6]= 7;
-                    left_block[7]= 10;
+                    left_block= left_block_options[2];
                  }
              } else {
                  left_xy[1] += s->mb_stride;
-                //left_block[0]= 0;
-                left_block[1]= 2;
-                left_block[2]= 0;
-                left_block[3]= 2;
-                //left_block[4]= 7;
-                left_block[5]= 10;
-                left_block[6]= 7;
-                left_block[7]= 10;
+                left_block = left_block_options[3];
              }
          }
      }
@@ -292,7 +286,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
  4 L . .L . . . .
  5 L . .. . . . .
  */
-//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
      if(top_type){
          h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
          h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
@@ -673,7 +667,7 @@ static inline void write_back_non_zero_count(H264Context *h){
  }
  
  /**
- * gets the predicted number of non zero coefficients.
+ * gets the predicted number of non-zero coefficients.
   * @param n block index
   */
  static inline int pred_non_zero_count(H264Context *h, int n){
@@ -728,7 +722,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
              if(MB_FIELD
                 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
                 && i >= scan8[0]+8){
-                // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
+                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
                  SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
              }
          }
@@ -894,13 +888,14 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
  }
  
  static inline void direct_dist_scale_factor(H264Context * const h){
-    const int poc = h->s.current_picture_ptr->poc;
+    MpegEncContext * const s = &h->s;
+    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
      const int poc1 = h->ref_list[1][0].poc;
      int i;
      for(i=0; i<h->ref_count[0]; i++){
          int poc0 = h->ref_list[0][i].poc;
          int td = av_clip(poc1 - poc0, -128, 127);
-        if(td == 0 /* FIXME || pic0 is a long-term ref */){
+        if(td == 0 || h->ref_list[0][i].long_ref){
              h->dist_scale_factor[i] = 256;
          }else{
              int tb = av_clip(poc - poc0, -128, 127);
@@ -920,23 +915,27 @@ static inline void direct_ref_list_init(H264Context * const h){
      Picture * const ref1 = &h->ref_list[1][0];
      Picture * const cur = s->current_picture_ptr;
      int list, i, j;
-    if(cur->pict_type == FF_I_TYPE)
-        cur->ref_count[0] = 0;
-    if(cur->pict_type != FF_B_TYPE)
-        cur->ref_count[1] = 0;
+    int sidx= s->picture_structure&1;
+    int ref1sidx= ref1->reference&1;
      for(list=0; list<2; list++){
-        cur->ref_count[list] = h->ref_count[list];
+        cur->ref_count[sidx][list] = h->ref_count[list];
          for(j=0; j<h->ref_count[list]; j++)
-            cur->ref_poc[list][j] = h->ref_list[list][j].poc;
+            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
+    }
+    if(s->picture_structure == PICT_FRAME){
+        memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
+        memcpy(cur->ref_poc  [0], cur->ref_poc  [1], sizeof(cur->ref_poc  [0]));
      }
      if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
          return;
      for(list=0; list<2; list++){
-        for(i=0; i<ref1->ref_count[list]; i++){
-            const int poc = ref1->ref_poc[list][i];
+        for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
+            int poc = ref1->ref_poc[ref1sidx][list][i];
+            if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
+                poc= (poc&~3) + s->picture_structure;
              h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
              for(j=0; j<h->ref_count[list]; j++)
-                if(h->ref_list[list][j].poc == poc){
+                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
                      h->map_col_to_list0[list][i] = j;
                      break;
                  }
@@ -944,7 +943,7 @@ static inline void direct_ref_list_init(H264Context * const h){
      }
      if(FRAME_MBAFF){
          for(list=0; list<2; list++){
-            for(i=0; i<ref1->ref_count[list]; i++){
+            for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
                  j = h->map_col_to_list0[list][i];
                  h->map_col_to_list0_field[list][2*i] = 2*j;
                  h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
@@ -955,9 +954,10 @@ static inline void direct_ref_list_init(H264Context * const h){
  
  static inline void pred_direct_motion(H264Context * const h, int *mb_type){
      MpegEncContext * const s = &h->s;
-    const int mb_xy =   h->mb_xy;
-    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
-    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+    const int fieldoff= (s->picture_structure & h->ref_list[1][0].reference) ? 0 : (3-2*s->picture_structure);
+    const int mb_xy =   h->mb_xy + s->mb_stride*fieldoff;
+    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride + 2*h->b8_stride*fieldoff;
+    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h-> b_stride + 4*h-> b_stride*fieldoff;
      const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
      const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
      const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
@@ -999,7 +999,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
              int refa = h->ref_cache[list][scan8[0] - 1];
              int refb = h->ref_cache[list][scan8[0] - 8];
              int refc = h->ref_cache[list][scan8[0] - 8 + 4];
-            if(refc == -2)
+            if(refc == PART_NOT_AVAILABLE)
                  refc = h->ref_cache[list][scan8[0] - 8 - 1];
              ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
              if(ref[list] < 0)
@@ -1152,8 +1152,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
          const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
          const int *dist_scale_factor = h->dist_scale_factor;
  
-        if(FRAME_MBAFF){
-            if(IS_INTERLACED(*mb_type)){
+            if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
                  map_col_to_list0[0] = h->map_col_to_list0_field[0];
                  map_col_to_list0[1] = h->map_col_to_list0_field[1];
                  dist_scale_factor = h->dist_scale_factor_field;
@@ -1162,7 +1161,10 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                  /* FIXME assumes direct_8x8_inference == 1 */
                  const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
                  int mb_types_col[2];
+                int b8_stride = h->b8_stride;
+                int b4_stride = h->b_stride;
                  int y_shift;
+                int ref_shift;
  
                  *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
                           | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
@@ -1180,6 +1182,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                          l1mv1 -= 4*h->b_stride;
                      }
                      y_shift = 0;
+                    ref_shift= FRAME_MBAFF ? 0 : 1;
  
                      if(   (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
                         && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
@@ -1187,24 +1190,28 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                          *mb_type |= MB_TYPE_16x8;
                      else
                          *mb_type |= MB_TYPE_8x8;
+                    b8_stride *= 3;
+                    b4_stride *= 6;
                  }else{
-                    /* field to frame scaling */
-                    /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
-                     * but in MBAFF, top and bottom POC are equal */
-                    int dy = (s->mb_y&1) ? 1 : 2;
+                    int cur_poc = s->current_picture_ptr->poc;
+                    int *col_poc = h->ref_list[1]->field_poc;
+                    int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+                    int dy = 2*col_parity - (s->mb_y&1);
                      mb_types_col[0] =
-                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
                      l1ref0 += dy*h->b8_stride;
                      l1ref1 += dy*h->b8_stride;
                      l1mv0 += 2*dy*h->b_stride;
                      l1mv1 += 2*dy*h->b_stride;
                      y_shift = 2;
+                    ref_shift= FRAME_MBAFF ? 2 : 1;
  
                      if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
                         && !is_b8x8)
                          *mb_type |= MB_TYPE_16x16;
                      else
                          *mb_type |= MB_TYPE_8x8;
+                    b8_stride = 0;
                  }
  
                  for(i8=0; i8<4; i8++){
@@ -1225,18 +1232,18 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                          continue;
                      }
  
-                    ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
+                    ref0 = l1ref0[x8 + y8*b8_stride];
                      if(ref0 >= 0)
-                        ref0 = map_col_to_list0[0][ref0*2>>y_shift];
+                        ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
                      else{
-                        ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
+                        ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
                          l1mv= l1mv1;
                      }
                      scale = dist_scale_factor[ref0];
                      fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
  
                      {
-                        const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
+                        const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
                          int my_col = (mv_col[1]<<y_shift)/2;
                          int mx = (scale * mv_col[0] + 128) >> 8;
                          int my = (scale * my_col + 128) >> 8;
@@ -1246,7 +1253,6 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                  }
                  return;
              }
-        }
  
          /* one-to-one mv scaling */
  
@@ -1454,7 +1460,7 @@ static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
  }
  
  /**
- * idct tranforms the 16 dc values and dequantize them.
+ * IDCT transforms the 16 dc values and dequantizes them.
   * @param qp quantization parameter
   */
  static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
@@ -1486,7 +1492,7 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
          const int z2= temp[4*1+i] - temp[4*3+i];
          const int z3= temp[4*1+i] + temp[4*3+i];
  
-        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
+        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
          block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
          block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
          block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
@@ -1495,7 +1501,7 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
  
  #if 0
  /**
- * dct tranforms the 16 dc values.
+ * DCT transforms the 16 dc values.
   * @param qp quantization parameter ??? FIXME
   */
  static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
@@ -1584,7 +1590,7 @@ static void chroma_dc_dct_c(DCTELEM *block){
   * gets the chroma qp.
   */
  static inline int get_chroma_qp(H264Context *h, int t, int qscale){
-    return h->pps.chroma_qp_table[t][qscale & 0xff];
+    return h->pps.chroma_qp_table[t][qscale];
  }
  
  //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
@@ -1684,7 +1690,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
      const int pic_width  = 16*s->mb_width;
      const int pic_height = 16*s->mb_height >> MB_FIELD;
  
-    if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
+    if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
          return;
  
      if(mx&7) extra_width -= 3;
@@ -1954,37 +1960,67 @@ static av_cold void decode_init_vlc(void){
  
      if (!done) {
          int i;
+        int offset;
          done = 1;
  
+        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
+        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
          init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                   &chroma_dc_coeff_token_len [0], 1, 1,
-                 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
+                 &chroma_dc_coeff_token_bits[0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
  
+        offset = 0;
          for(i=0; i<4; i++){
+            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
+            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
              init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                       &coeff_token_len [i][0], 1, 1,
-                     &coeff_token_bits[i][0], 1, 1, 1);
+                     &coeff_token_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
+            offset += coeff_token_vlc_tables_size[i];
          }
+        /*
+         * This is a one time safety check to make sure that
+         * the packed static coeff_token_vlc table sizes
+         * were initialized correctly.
+         */
+        assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
  
          for(i=0; i<3; i++){
-            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
+            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
+            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
+            init_vlc(&chroma_dc_total_zeros_vlc[i],
+                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
                       &chroma_dc_total_zeros_len [i][0], 1, 1,
-                     &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
+                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
          }
          for(i=0; i<15; i++){
-            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
+            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
+            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
+            init_vlc(&total_zeros_vlc[i],
+                     TOTAL_ZEROS_VLC_BITS, 16,
                       &total_zeros_len [i][0], 1, 1,
-                     &total_zeros_bits[i][0], 1, 1, 1);
+                     &total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
          }
  
          for(i=0; i<6; i++){
-            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
+            run_vlc[i].table = run_vlc_tables[i];
+            run_vlc[i].table_allocated = run_vlc_tables_size;
+            init_vlc(&run_vlc[i],
+                     RUN_VLC_BITS, 7,
                       &run_len [i][0], 1, 1,
-                     &run_bits[i][0], 1, 1, 1);
+                     &run_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
          }
+        run7_vlc.table = run7_vlc_table,
+        run7_vlc.table_allocated = run7_vlc_table_size;
          init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                   &run_len [6][0], 1, 1,
-                 &run_bits[6][0], 1, 1, 1);
+                 &run_bits[6][0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
      }
  }
  
@@ -2213,6 +2249,7 @@ static av_cold int decode_init(AVCodecContext *avctx){
      }
  
      h->thread_context[0] = h;
+    h->outputed_poc = INT_MIN;
      return 0;
  }
  
@@ -2226,7 +2263,7 @@ static int frame_start(H264Context *h){
      /*
       * MPV_frame_start uses pict_type to derive key_frame.
       * This is incorrect for H.264; IDR markings must be used.
-     * Zero here; IDR markings per slice in frame or fields are OR'd in later.
+     * Zero here; IDR markings per slice in frame or fields are ORed in later.
       * See decode_nal_units().
       */
      s->current_picture_ptr->key_frame= 0;
@@ -2256,7 +2293,7 @@ static int frame_start(H264Context *h){
  
  //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
  
-    // We mark the current picture as non reference after allocating it, so
+    // We mark the current picture as non-reference after allocating it, so
      // that if we break out due to an error it can be released automatically
      // in the next MPV_frame_start().
      // SVQ3 as well as most other codecs have only last/next/current and thus
@@ -2264,6 +2301,11 @@ static int frame_start(H264Context *h){
      // mark frames as reference later "naturally".
      if(s->codec_id != CODEC_ID_SVQ3)
          s->current_picture_ptr->reference= 0;
+
+    s->current_picture_ptr->field_poc[0]=
+    s->current_picture_ptr->field_poc[1]= INT_MAX;
+    assert(s->current_picture_ptr->long_ref==0);
+
      return 0;
  }
  
@@ -2467,7 +2509,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
          linesize   = h->mb_linesize   = s->linesize * 2;
          uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
          block_offset = &h->block_offset[24];
-        if(mb_y&1){ //FIXME move out of this func?
+        if(mb_y&1){ //FIXME move out of this function?
              dest_y -= s->linesize*15;
              dest_cb-= s->uvlinesize*7;
              dest_cr-= s->uvlinesize*7;
@@ -2517,30 +2559,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
      }
  
      if (!simple && IS_INTRA_PCM(mb_type)) {
-        unsigned int x, y;
-
-        // The pixels are stored in h->mb array in the same order as levels,
-        // copy them in output in the correct order.
-        for(i=0; i<16; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
+        for (i=0; i<16; i++) {
+            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
          }
-        for(i=16; i<16+4; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
-        }
-        for(i=20; i<20+4; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
+        for (i=0; i<8; i++) {
+            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
          }
      } else {
          if(IS_INTRA(mb_type)){
@@ -2712,6 +2736,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
              tprintf(h->s.avctx, "call filter_mb\n");
              backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
              fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
+            h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
+            h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
              filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
          }
      }
@@ -2754,6 +2780,7 @@ static void pic_as_field(Picture *pic, const int parity){
          pic->reference = parity;
          pic->linesize[i] *= 2;
      }
+    pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
  }
  
  static int split_field_copy(Picture *dest, Picture *src,
@@ -2762,87 +2789,57 @@ static int split_field_copy(Picture *dest, Picture *src,
  
      if (match) {
          *dest = *src;
-        pic_as_field(dest, parity);
-        dest->pic_id *= 2;
-        dest->pic_id += id_add;
+        if(parity != PICT_FRAME){
+            pic_as_field(dest, parity);
+            dest->pic_id *= 2;
+            dest->pic_id += id_add;
+        }
      }
  
      return match;
  }
  
-/**
- * Split one reference list into field parts, interleaving by parity
- * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
- * set to look at the actual start of data for that field.
- *
- * @param dest output list
- * @param dest_len maximum number of fields to put in dest
- * @param src the source reference list containing fields and/or field pairs
- *            (aka short_ref/long_ref, or
- *             refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
- * @param src_len number of Picture's in source (pairs and unmatched fields)
- * @param parity the parity of the picture being decoded/needing
- *        these ref pics (PICT_{TOP,BOTTOM}_FIELD)
- * @return number of fields placed in dest
- */
-static int split_field_half_ref_list(Picture *dest, int dest_len,
-                                     Picture *src,  int src_len,  int parity){
-    int same_parity   = 1;
-    int same_i        = 0;
-    int opp_i         = 0;
-    int out_i;
-    int field_output;
-
-    for (out_i = 0; out_i < dest_len; out_i += field_output) {
-        if (same_parity && same_i < src_len) {
-            field_output = split_field_copy(dest + out_i, src + same_i,
-                                            parity, 1);
-            same_parity = !field_output;
-            same_i++;
-
-        } else if (opp_i < src_len) {
-            field_output = split_field_copy(dest + out_i, src + opp_i,
-                                            PICT_FRAME - parity, 0);
-            same_parity = field_output;
-            opp_i++;
+static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
+    int i[2]={0};
+    int index=0;
  
-        } else {
-            break;
+    while(i[0]<len || i[1]<len){
+        while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
+            i[0]++;
+        while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
+            i[1]++;
+        if(i[0] < len){
+            in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
+        }
+        if(i[1] < len){
+            in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
          }
      }
  
-    return out_i;
+    return index;
  }
  
-/**
- * Split the reference frame list into a reference field list.
- * This implements H.264 spec 8.2.4.2.5 for a combined input list.
- * The input list contains both reference field pairs and
- * unmatched reference fields; it is ordered as spec describes
- * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
- * unmatched field pairs are also present. Conceptually this is equivalent
- * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
- *
- * @param dest output reference list where ordered fields are to be placed
- * @param dest_len max number of fields to place at dest
- * @param src source reference list, as described above
- * @param src_len number of pictures (pairs and unmatched fields) in src
- * @param parity parity of field being currently decoded
- *        (one of PICT_{TOP,BOTTOM}_FIELD)
- * @param long_i index into src array that holds first long reference picture,
- *        or src_len if no long refs present.
- */
-static int split_field_ref_list(Picture *dest, int dest_len,
-                                Picture *src,  int src_len,
-                                int parity,    int long_i){
+static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
+    int i, best_poc;
+    int out_i= 0;
  
-    int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
-    dest += i;
-    dest_len -= i;
+    for(;;){
+        best_poc= dir ? INT_MIN : INT_MAX;
  
-    i += split_field_half_ref_list(dest, dest_len, src + long_i,
-                                   src_len - long_i, parity);
-    return i;
+        for(i=0; i<len; i++){
+            const int poc= src[i]->poc;
+            if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
+                best_poc= poc;
+                sorted[out_i]= src[i];
+            }
+        }
+        if(best_poc == (dir ? INT_MIN : INT_MAX))
+            break;
+        limit= sorted[out_i++]->poc - dir;
+    }
+    return out_i;
  }
  
  /**
@@ -2850,140 +2847,42 @@ static int split_field_ref_list(Picture *dest, int dest_len,
   */
  static int fill_default_ref_list(H264Context *h){
      MpegEncContext * const s = &h->s;
-    int i;
-    int smallest_poc_greater_than_current = -1;
-    int structure_sel;
-    Picture sorted_short_ref[32];
-    Picture field_entry_list[2][32];
-    Picture *frame_list[2];
-
-    if (FIELD_PICTURE) {
-        structure_sel = PICT_FRAME;
-        frame_list[0] = field_entry_list[0];
-        frame_list[1] = field_entry_list[1];
-    } else {
-        structure_sel = 0;
-        frame_list[0] = h->default_ref_list[0];
-        frame_list[1] = h->default_ref_list[1];
-    }
+    int i, len;
  
      if(h->slice_type_nos==FF_B_TYPE){
-        int list;
-        int len[2];
-        int short_len[2];
-        int out_i;
-        int limit= INT_MIN;
-
-        /* sort frame according to poc in B slice */
-        for(out_i=0; out_i<h->short_ref_count; out_i++){
-            int best_i=INT_MIN;
-            int best_poc=INT_MAX;
-
-            for(i=0; i<h->short_ref_count; i++){
-                const int poc= h->short_ref[i]->poc;
-                if(poc > limit && poc < best_poc){
-                    best_poc= poc;
-                    best_i= i;
-                }
-            }
-
-            assert(best_i != INT_MIN);
+        Picture *sorted[32];
+        int cur_poc, list;
+        int lens[2];
  
-            limit= best_poc;
-            sorted_short_ref[out_i]= *h->short_ref[best_i];
-            tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
-            if (-1 == smallest_poc_greater_than_current) {
-                if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
-                    smallest_poc_greater_than_current = out_i;
-                }
-            }
-        }
-
-        tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
-
-        // find the largest poc
-        for(list=0; list<2; list++){
-            int index = 0;
-            int j= -99;
-            int step= list ? -1 : 1;
-
-            for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
-                int sel;
-                while(j<0 || j>= h->short_ref_count){
-                    if(j != -99 && step == (list ? -1 : 1))
-                        return -1;
-                    step = -step;
-                    j= smallest_poc_greater_than_current + (step>>1);
-                }
-                sel = sorted_short_ref[j].reference | structure_sel;
-                if(sel != PICT_FRAME) continue;
-                frame_list[list][index  ]= sorted_short_ref[j];
-                frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
-            }
-            short_len[list] = index;
+        if(FIELD_PICTURE)
+            cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
+        else
+            cur_poc= s->current_picture_ptr->poc;
  
-            for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
-                int sel;
-                if(h->long_ref[i] == NULL) continue;
-                sel = h->long_ref[i]->reference | structure_sel;
-                if(sel != PICT_FRAME) continue;
+        for(list= 0; list<2; list++){
+            len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
+            len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
+            assert(len<=32);
+            len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
+            len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
+            assert(len<=32);
  
-                frame_list[ list ][index  ]= *h->long_ref[i];
-                frame_list[ list ][index++].pic_id= i;
-            }
-            len[list] = index;
+            if(len < h->ref_count[list])
+                memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
+            lens[list]= len;
          }
  
-        for(list=0; list<2; list++){
-            if (FIELD_PICTURE)
-                len[list] = split_field_ref_list(h->default_ref_list[list],
-                                                 h->ref_count[list],
-                                                 frame_list[list],
-                                                 len[list],
-                                                 s->picture_structure,
-                                                 short_len[list]);
-
-            // swap the two first elements of L1 when L0 and L1 are identical
-            if(list && len[0] > 1 && len[0] == len[1])
-                for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
-                    if(i == len[0]){
-                        FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
-                        break;
-                    }
-
-            if(len[list] < h->ref_count[ list ])
-                memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
+        if(lens[0] == lens[1] && lens[1] > 1){
+            for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
+            if(i == lens[0])
+                FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
          }
-
-
      }else{
-        int index=0;
-        int short_len;
-        for(i=0; i<h->short_ref_count; i++){
-            int sel;
-            sel = h->short_ref[i]->reference | structure_sel;
-            if(sel != PICT_FRAME) continue;
-            frame_list[0][index  ]= *h->short_ref[i];
-            frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
-        }
-        short_len = index;
-        for(i = 0; i < 16; i++){
-            int sel;
-            if(h->long_ref[i] == NULL) continue;
-            sel = h->long_ref[i]->reference | structure_sel;
-            if(sel != PICT_FRAME) continue;
-            frame_list[0][index  ]= *h->long_ref[i];
-            frame_list[0][index++].pic_id= i;
-        }
-
-        if (FIELD_PICTURE)
-            index = split_field_ref_list(h->default_ref_list[0],
-                                         h->ref_count[0], frame_list[0],
-                                         index, s->picture_structure,
-                                         short_len);
-
-        if(index < h->ref_count[0])
-            memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
+        len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
+        len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
+        assert(len <= 32);
+        if(len < h->ref_count[0])
+            memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
      }
  #ifdef TRACE
      for (i=0; i<h->ref_count[0]; i++) {
@@ -3031,7 +2930,6 @@ static int decode_ref_pic_list_reordering(H264Context *h){
  
      print_short_term(h);
      print_long_term(h);
-    if(h->slice_type_nos==FF_I_TYPE) return 0; //FIXME move before func
  
      for(list=0; list<h->list_count; list++){
          memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
@@ -3073,10 +2971,10 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                              ref = h->short_ref[i];
                              assert(ref->reference);
                              assert(!ref->long_ref);
-                            if(ref->data[0] != NULL &&
+                            if(
                                     ref->frame_num == frame_num &&
-                                   (ref->reference & pic_structure) &&
-                                   ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
+                                   (ref->reference & pic_structure)
+                              )
                                  break;
                          }
                          if(i>=0)
@@ -3127,8 +3025,10 @@ static int decode_ref_pic_list_reordering(H264Context *h){
      }
      for(list=0; list<h->list_count; list++){
          for(index= 0; index < h->ref_count[list]; index++){
-            if(!h->ref_list[list][index].data[0])
-                h->ref_list[list][index]= s->current_picture;
+            if(!h->ref_list[list][index].data[0]){
+                av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
+                h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
+            }
          }
      }
  
@@ -3197,21 +3097,23 @@ static int pred_weight_table(H264Context *h){
                  h->luma_offset[list][i]= 0;
              }
  
-            chroma_weight_flag= get_bits1(&s->gb);
-            if(chroma_weight_flag){
-                int j;
-                for(j=0; j<2; j++){
-                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
-                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
-                    if(   h->chroma_weight[list][i][j] != chroma_def
-                       || h->chroma_offset[list][i][j] != 0)
-                        h->use_weight_chroma= 1;
-                }
-            }else{
-                int j;
-                for(j=0; j<2; j++){
-                    h->chroma_weight[list][i][j]= chroma_def;
-                    h->chroma_offset[list][i][j]= 0;
+            if(CHROMA){
+                chroma_weight_flag= get_bits1(&s->gb);
+                if(chroma_weight_flag){
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
+                        h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
+                        if(   h->chroma_weight[list][i][j] != chroma_def
+                        || h->chroma_offset[list][i][j] != 0)
+                            h->use_weight_chroma= 1;
+                    }
+                }else{
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= chroma_def;
+                        h->chroma_offset[list][i][j]= 0;
+                    }
                  }
              }
          }
@@ -3289,18 +3191,19 @@ static void idr(H264Context *h){
      int i;
  
      for(i=0; i<16; i++){
-        if (h->long_ref[i] != NULL) {
-            unreference_pic(h, h->long_ref[i], 0);
-            h->long_ref[i]= NULL;
-        }
+        remove_long(h, i, 0);
      }
-    h->long_ref_count=0;
+    assert(h->long_ref_count==0);
  
      for(i=0; i<h->short_ref_count; i++){
          unreference_pic(h, h->short_ref[i], 0);
          h->short_ref[i]= NULL;
      }
      h->short_ref_count=0;
+    h->prev_frame_num= 0;
+    h->prev_frame_num_offset= 0;
+    h->prev_poc_msb=
+    h->prev_poc_lsb= 0;
  }
  
  /* forget old pics after a seek */
@@ -3361,7 +3264,7 @@ static void remove_short_at_index(H264Context *h, int i){
   *
   * @return the removed picture or NULL if an error occurs
   */
-static Picture * remove_short(H264Context *h, int frame_num){
+static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
      MpegEncContext * const s = &h->s;
      Picture *pic;
      int i;
@@ -3370,33 +3273,31 @@ static Picture * remove_short(H264Context *h, int frame_num){
          av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
  
      pic = find_short(h, frame_num, &i);
-    if (pic)
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask))
          remove_short_at_index(h, i);
+    }
  
      return pic;
  }
  
  /**
   * Remove a picture from the long term reference list by its index in
- * that list.  This does no checking on the provided index; it is assumed
- * to be valid. The removed entry is set to NULL. Other entries are unaffected.
- * @param i index into h->long_ref of picture to remove.
- */
-static void remove_long_at_index(H264Context *h, int i){
-    h->long_ref[i]= NULL;
-    h->long_ref_count--;
-}
-
-/**
- *
+ * that list.
   * @return the removed picture or NULL if an error occurs
   */
-static Picture * remove_long(H264Context *h, int i){
+static Picture * remove_long(H264Context *h, int i, int ref_mask){
      Picture *pic;
  
      pic= h->long_ref[i];
-    if (pic)
-        remove_long_at_index(h, i);
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask)){
+            assert(h->long_ref[i]->long_ref == 1);
+            h->long_ref[i]->long_ref= 0;
+            h->long_ref[i]= NULL;
+            h->long_ref_count--;
+        }
+    }
  
      return pic;
  }
@@ -3444,73 +3345,58 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
          av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
  
      for(i=0; i<mmco_count; i++){
-        int structure, frame_num, unref_pic;
+        int structure, frame_num;
          if(s->avctx->debug&FF_DEBUG_MMCO)
              av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
  
+        if(   mmco[i].opcode == MMCO_SHORT2UNUSED
+           || mmco[i].opcode == MMCO_SHORT2LONG){
+            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
+            pic = find_short(h, frame_num, &j);
+            if(!pic){
+                if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
+                   || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
+                av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
+                continue;
+            }
+        }
+
          switch(mmco[i].opcode){
          case MMCO_SHORT2UNUSED:
              if(s->avctx->debug&FF_DEBUG_MMCO)
                  av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
-            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
-            pic = find_short(h, frame_num, &j);
-            if (pic) {
-                if (unreference_pic(h, pic, structure ^ PICT_FRAME))
-                    remove_short_at_index(h, j);
-            } else if(s->avctx->debug&FF_DEBUG_MMCO)
-                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
+            remove_short(h, frame_num, structure ^ PICT_FRAME);
              break;
          case MMCO_SHORT2LONG:
-            if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
-                    h->long_ref[mmco[i].long_arg]->frame_num ==
-                                              mmco[i].short_pic_num / 2) {
-                /* do nothing, we've already moved this field pair. */
-            } else {
-                int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
-
-                pic= remove_long(h, mmco[i].long_arg);
-                if(pic) unreference_pic(h, pic, 0);
+                if (h->long_ref[mmco[i].long_arg] != pic)
+                    remove_long(h, mmco[i].long_arg, 0);
  
-                h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
+                remove_short_at_index(h, j);
+                h->long_ref[ mmco[i].long_arg ]= pic;
                  if (h->long_ref[ mmco[i].long_arg ]){
                      h->long_ref[ mmco[i].long_arg ]->long_ref=1;
                      h->long_ref_count++;
                  }
-            }
              break;
          case MMCO_LONG2UNUSED:
              j = pic_num_extract(h, mmco[i].long_arg, &structure);
              pic = h->long_ref[j];
              if (pic) {
-                if (unreference_pic(h, pic, structure ^ PICT_FRAME))
-                    remove_long_at_index(h, j);
+                remove_long(h, j, structure ^ PICT_FRAME);
              } else if(s->avctx->debug&FF_DEBUG_MMCO)
                  av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
              break;
          case MMCO_LONG:
-            unref_pic = 1;
-            if (FIELD_PICTURE && !s->first_field) {
-                if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
-                    /* Just mark second field as referenced */
-                    unref_pic = 0;
-                } else if (s->current_picture_ptr->reference) {
+                    // Comment below left from previous code as it is an interresting note.
                      /* First field in pair is in short term list or
                       * at a different long term index.
-                     * This is not allowed; see 7.4.3, notes 2 and 3.
+                     * This is not allowed; see 7.4.3.3, notes 2 and 3.
                       * Report the problem and keep the pair where it is,
                       * and mark this field valid.
                       */
-                    av_log(h->s.avctx, AV_LOG_ERROR,
-                        "illegal long term reference assignment for second "
-                        "field in complementary field pair (first field is "
-                        "short term or has non-matching long index)\n");
-                    unref_pic = 0;
-                }
-            }
  
-            if (unref_pic) {
-                pic= remove_long(h, mmco[i].long_arg);
-                if(pic) unreference_pic(h, pic, 0);
+            if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
+                remove_long(h, mmco[i].long_arg, 0);
  
                  h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
                  h->long_ref[ mmco[i].long_arg ]->long_ref=1;
@@ -3524,32 +3410,34 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
              assert(mmco[i].long_arg <= 16);
              // just remove the long term which index is greater than new max
              for(j = mmco[i].long_arg; j<16; j++){
-                pic = remove_long(h, j);
-                if (pic) unreference_pic(h, pic, 0);
+                remove_long(h, j, 0);
              }
              break;
          case MMCO_RESET:
              while(h->short_ref_count){
-                pic= remove_short(h, h->short_ref[0]->frame_num);
-                if(pic) unreference_pic(h, pic, 0);
+                remove_short(h, h->short_ref[0]->frame_num, 0);
              }
              for(j = 0; j < 16; j++) {
-                pic= remove_long(h, j);
-                if(pic) unreference_pic(h, pic, 0);
-            }
+                remove_long(h, j, 0);
+            }
+            s->current_picture_ptr->poc=
+            s->current_picture_ptr->field_poc[0]=
+            s->current_picture_ptr->field_poc[1]=
+            h->poc_lsb=
+            h->poc_msb=
+            h->frame_num=
+            s->current_picture_ptr->frame_num= 0;
              break;
          default: assert(0);
          }
      }
  
-    if (!current_ref_assigned && FIELD_PICTURE &&
-            !s->first_field && s->current_picture_ptr->reference) {
-
+    if (!current_ref_assigned) {
          /* Second field of complementary field pair; the first field of
           * which is already referenced. If short referenced, it
           * should be first entry in short_ref. If not, it must exist
           * in long_ref; trying to put it on the short list here is an
-         * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
+         * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
           */
          if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
              /* Just mark the second field valid */
@@ -3560,30 +3448,18 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
                                               "in complementary field pair "
                                               "(first field is long term)\n");
          } else {
-            /*
-             * First field in reference, but not in any sensible place on our
-             * reference lists. This shouldn't happen unless reference
-             * handling somewhere else is wrong.
-             */
-            assert(0);
-        }
-        current_ref_assigned = 1;
-    }
-
-    if(!current_ref_assigned){
-        pic= remove_short(h, s->current_picture_ptr->frame_num);
-        if(pic){
-            unreference_pic(h, pic, 0);
-            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
-        }
+            pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
+            if(pic){
+                av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
+            }
  
-        if(h->short_ref_count)
-            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
+            if(h->short_ref_count)
+                memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
  
-        h->short_ref[0]= s->current_picture_ptr;
-        h->short_ref[0]->long_ref=0;
-        h->short_ref_count++;
-        s->current_picture_ptr->reference |= s->picture_structure;
+            h->short_ref[0]= s->current_picture_ptr;
+            h->short_ref_count++;
+            s->current_picture_ptr->reference |= s->picture_structure;
+        }
      }
  
      if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
@@ -3596,19 +3472,17 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
                 "number of reference frames exceeds max (probably "
                 "corrupt input), discarding one\n");
  
-        if (h->long_ref_count) {
+        if (h->long_ref_count && !h->short_ref_count) {
              for (i = 0; i < 16; ++i)
                  if (h->long_ref[i])
                      break;
  
              assert(i < 16);
-            pic = h->long_ref[i];
-            remove_long_at_index(h, i);
+            remove_long(h, i, 0);
          } else {
              pic = h->short_ref[h->short_ref_count - 1];
-            remove_short_at_index(h, h->short_ref_count - 1);
+            remove_short(h, pic->frame_num, 0);
          }
-        unreference_pic(h, pic, 0);
      }
  
      print_short_term(h);
@@ -3620,13 +3494,12 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
      MpegEncContext * const s = &h->s;
      int i;
  
+    h->mmco_index= 0;
      if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
          s->broken_link= get_bits1(gb) -1;
-        h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
-        if(h->mmco[0].long_arg == -1)
-            h->mmco_index= 0;
-        else{
+        if(get_bits1(gb)){
              h->mmco[0].opcode= MMCO_LONG;
+            h->mmco[0].long_arg= 0;
              h->mmco_index= 1;
          }
      }else{
@@ -3673,8 +3546,7 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
                      h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
                      h->mmco_index= 2;
                  }
-            }else
-                h->mmco_index= 0;
+            }
          }
      }
  
@@ -3685,24 +3557,15 @@ static int init_poc(H264Context *h){
      MpegEncContext * const s = &h->s;
      const int max_frame_num= 1<<h->sps.log2_max_frame_num;
      int field_poc[2];
+    Picture *cur = s->current_picture_ptr;
  
-    if(h->nal_unit_type == NAL_IDR_SLICE){
-        h->frame_num_offset= 0;
-    }else{
-        if(h->frame_num < h->prev_frame_num)
-            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
-        else
-            h->frame_num_offset= h->prev_frame_num_offset;
-    }
+    h->frame_num_offset= h->prev_frame_num_offset;
+    if(h->frame_num < h->prev_frame_num)
+        h->frame_num_offset += max_frame_num;
  
      if(h->sps.poc_type==0){
          const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
  
-        if(h->nal_unit_type == NAL_IDR_SLICE){
-             h->prev_poc_msb=
-             h->prev_poc_lsb= 0;
-        }
-
          if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
              h->poc_msb = h->prev_poc_msb + max_poc_lsb;
          else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
@@ -3749,29 +3612,20 @@ static int init_poc(H264Context *h){
          if(s->picture_structure == PICT_FRAME)
              field_poc[1] += h->delta_poc[1];
      }else{
-        int poc;
-        if(h->nal_unit_type == NAL_IDR_SLICE){
-            poc= 0;
-        }else{
-            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
-            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
-        }
+        int poc= 2*(h->frame_num_offset + h->frame_num);
+
+        if(!h->nal_ref_idc)
+            poc--;
+
          field_poc[0]= poc;
          field_poc[1]= poc;
      }
  
-    if(s->picture_structure != PICT_BOTTOM_FIELD) {
+    if(s->picture_structure != PICT_BOTTOM_FIELD)
          s->current_picture_ptr->field_poc[0]= field_poc[0];
-        s->current_picture_ptr->poc = field_poc[0];
-    }
-    if(s->picture_structure != PICT_TOP_FIELD) {
+    if(s->picture_structure != PICT_TOP_FIELD)
          s->current_picture_ptr->field_poc[1]= field_poc[1];
-        s->current_picture_ptr->poc = field_poc[1];
-    }
-    if(!FIELD_PICTURE || !s->first_field) {
-        Picture *cur = s->current_picture_ptr;
-        cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
-    }
+    cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
  
      return 0;
  }
@@ -3910,7 +3764,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
      h->slice_type= slice_type;
      h->slice_type_nos= slice_type & 3;
  
-    s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+    s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
      if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
          av_log(h->s.avctx, AV_LOG_ERROR,
                 "B picture before any references, skipping\n");
@@ -3923,13 +3777,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
          return -1;
      }
      if(!h0->pps_buffers[pps_id]) {
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
          return -1;
      }
      h->pps= *h0->pps_buffers[pps_id];
  
      if(!h0->sps_buffers[h->pps.sps_id]) {
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
          return -1;
      }
      h->sps = *h0->sps_buffers[h->pps.sps_id];
@@ -4013,8 +3867,19 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
              h->mb_aff_frame = h->sps.mb_aff;
          }
      }
+    h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
  
      if(h0->current_slice == 0){
+        while(h->frame_num !=  h->prev_frame_num &&
+              h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
+            av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
+            frame_start(h);
+            h->prev_frame_num++;
+            h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
+            s->current_picture_ptr->frame_num= h->prev_frame_num;
+            execute_ref_pic_marking(h, NULL, 0);
+        }
+
          /* See if we have a decoded first field looking for a pair... */
          if (s0->first_field) {
              assert(s0->current_picture_ptr);
@@ -4110,7 +3975,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
          h->redundant_pic_count= get_ue_golomb(&s->gb);
      }
  
-    //set defaults, might be overriden a few line later
+    //set defaults, might be overridden a few lines later
      h->ref_count[0]= h->pps.ref_count[0];
      h->ref_count[1]= h->pps.ref_count[1];
  
@@ -4142,7 +4007,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
          fill_default_ref_list(h);
      }
  
-    if(decode_ref_pic_list_reordering(h) < 0)
+    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
          return -1;
  
      if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
@@ -4504,7 +4369,7 @@ static void decode_mb_skip(H264Context *h){
  
  /**
   * decodes a macroblock
- * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
   */
  static int decode_mb_cavlc(H264Context *h){
      MpegEncContext * const s = &h->s;
@@ -4538,8 +4403,7 @@ static int decode_mb_cavlc(H264Context *h){
      if(FRAME_MBAFF){
          if( (s->mb_y&1) == 0 )
              h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
-    }else
-        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+    }
  
      h->prev_mb_skipped= 0;
  
@@ -4581,38 +4445,18 @@ decode_intra_mb:
      h->slice_table[ mb_xy ]= h->slice_num;
  
      if(IS_INTRA_PCM(mb_type)){
-        unsigned int x, y;
+        unsigned int x;
  
          // We assume these blocks are very rare so we do not optimize it.
          align_get_bits(&s->gb);
  
          // The pixels are stored in the same order as levels in h->mb array.
-        for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
-            for(x=0; x<16; x++){
-                tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
-            }
+        for(x=0; x < (CHROMA ? 384 : 256); x++){
+            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
          }
  
          // In deblocking, the quantizer is 0
          s->current_picture.qscale_table[mb_xy]= 0;
-        h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
          // All coeffs are present
          memset(h->non_zero_count[mb_xy], 16, 16);
  
@@ -4629,43 +4473,44 @@ decode_intra_mb:
  
      //mb_pred
      if(IS_INTRA(mb_type)){
-            int pred_mode;
+        int pred_mode;
  //            init_top_left_availability(h);
-            if(IS_INTRA4x4(mb_type)){
-                int i;
-                int di = 1;
-                if(dct8x8_allowed && get_bits1(&s->gb)){
-                    mb_type |= MB_TYPE_8x8DCT;
-                    di = 4;
-                }
+        if(IS_INTRA4x4(mb_type)){
+            int i;
+            int di = 1;
+            if(dct8x8_allowed && get_bits1(&s->gb)){
+                mb_type |= MB_TYPE_8x8DCT;
+                di = 4;
+            }
  
  //                fill_intra4x4_pred_table(h);
-                for(i=0; i<16; i+=di){
-                    int mode= pred_intra_mode(h, i);
-
-                    if(!get_bits1(&s->gb)){
-                        const int rem_mode= get_bits(&s->gb, 3);
-                        mode = rem_mode + (rem_mode >= mode);
-                    }
+            for(i=0; i<16; i+=di){
+                int mode= pred_intra_mode(h, i);
  
-                    if(di==4)
-                        fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
-                    else
-                        h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+                if(!get_bits1(&s->gb)){
+                    const int rem_mode= get_bits(&s->gb, 3);
+                    mode = rem_mode + (rem_mode >= mode);
                  }
-                write_back_intra_pred_mode(h);
-                if( check_intra4x4_pred_mode(h) < 0)
-                    return -1;
-            }else{
-                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
-                if(h->intra16x16_pred_mode < 0)
-                    return -1;
-            }
  
+                if(di==4)
+                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                else
+                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+            }
+            write_back_intra_pred_mode(h);
+            if( check_intra4x4_pred_mode(h) < 0)
+                return -1;
+        }else{
+            h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
+            if(h->intra16x16_pred_mode < 0)
+                return -1;
+        }
+        if(CHROMA){
              pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
              if(pred_mode < 0)
                  return -1;
              h->chroma_pred_mode= pred_mode;
+        }
      }else if(partition_count==4){
          int i, j, sub_partition_count[4], list, ref[2][4];
  
@@ -4871,16 +4716,21 @@ decode_intra_mb:
              return -1;
          }
  
-        if(IS_INTRA4x4(mb_type))
-            cbp= golomb_to_intra4x4_cbp[cbp];
-        else
-            cbp= golomb_to_inter_cbp[cbp];
+        if(CHROMA){
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
+            else                     cbp= golomb_to_inter_cbp   [cbp];
+        }else{
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
+            else                     cbp= golomb_to_inter_cbp_gray[cbp];
+        }
      }
      h->cbp = cbp;
  
      if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
-        if(get_bits1(&s->gb))
+        if(get_bits1(&s->gb)){
              mb_type |= MB_TYPE_8x8DCT;
+            h->cbp_table[mb_xy]= cbp;
+        }
      }
      s->current_picture.mb_type[mb_xy]= mb_type;
  
@@ -5526,10 +5376,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
          }
      }
  
-    while( coeff_count-- ) {
+    do {
          uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
  
-        int j= scantable[index[coeff_count]];
+        int j= scantable[index[--coeff_count]];
  
          if( get_cabac( CC, ctx ) == 0 ) {
              node_ctx = coeff_abs_level_transition[0][node_ctx];
@@ -5566,7 +5416,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
                  block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
              }
          }
-    }
+    } while( coeff_count );
  #ifdef CABAC_ON_STACK
              h->cabac.range     = cc.range     ;
              h->cabac.low       = cc.low       ;
@@ -5624,7 +5474,7 @@ static inline void compute_mb_neighbors(H264Context *h)
  
  /**
   * decodes a macroblock
- * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
   */
  static int decode_mb_cabac(H264Context *h) {
      MpegEncContext * const s = &h->s;
@@ -5671,8 +5521,7 @@ static int decode_mb_cabac(H264Context *h) {
          if( (s->mb_y&1) == 0 )
              h->mb_mbaff =
              h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
-    }else
-        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+    }
  
      h->prev_mb_skipped = 0;
  
@@ -5715,7 +5564,6 @@ decode_intra_mb:
  
      if(IS_INTRA_PCM(mb_type)) {
          const uint8_t *ptr;
-        unsigned int x, y;
  
          // We assume these blocks are very rare so we do not optimize it.
          // FIXME The two following lines get the bitstream position in the cabac
@@ -5727,26 +5575,9 @@ decode_intra_mb:
          }
  
          // The pixels are stored in the same order as levels in h->mb array.
-        for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
-            for(x=0; x<16; x++){
-                tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
-            }
+        memcpy(h->mb, ptr, 256); ptr+=256;
+        if(CHROMA){
+            memcpy(h->mb+128, ptr, 128); ptr+=128;
          }
  
          ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
@@ -5756,8 +5587,6 @@ decode_intra_mb:
          h->chroma_pred_mode_table[mb_xy] = 0;
          // In deblocking, the quantizer is 0
          s->current_picture.qscale_table[mb_xy]= 0;
-        h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
          // All coeffs are present
          memset(h->non_zero_count[mb_xy], 16, 16);
          s->current_picture.mb_type[mb_xy]= mb_type;
@@ -5796,12 +5625,14 @@ decode_intra_mb:
              h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
              if( h->intra16x16_pred_mode < 0 ) return -1;
          }
-        h->chroma_pred_mode_table[mb_xy] =
-        pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
+        if(CHROMA){
+            h->chroma_pred_mode_table[mb_xy] =
+            pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
  
-        pred_mode= check_intra_pred_mode( h, pred_mode );
-        if( pred_mode < 0 ) return -1;
-        h->chroma_pred_mode= pred_mode;
+            pred_mode= check_intra_pred_mode( h, pred_mode );
+            if( pred_mode < 0 ) return -1;
+            h->chroma_pred_mode= pred_mode;
+        }
      } else if( partition_count == 4 ) {
          int i, j, sub_partition_count[4], list, ref[2][4];
  
@@ -6003,7 +5834,8 @@ decode_intra_mb:
  
      if( !IS_INTRA16x16( mb_type ) ) {
          cbp  = decode_cabac_mb_cbp_luma( h );
-        cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
+        if(CHROMA)
+            cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
      }
  
      h->cbp_table[mb_xy] = h->cbp = cbp;
@@ -6561,6 +6393,43 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
          }
      }
  
+    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
+    if(!h->pps.cabac && h->pps.transform_8x8_mode){
+        int top_type, left_type[2];
+        top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
+        left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
+        left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
+
+        if(IS_8x8DCT(top_type)){
+            h->non_zero_count_cache[4+8*0]=
+            h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
+            h->non_zero_count_cache[6+8*0]=
+            h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
+        }
+        if(IS_8x8DCT(left_type[0])){
+            h->non_zero_count_cache[3+8*1]=
+            h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
+        }
+        if(IS_8x8DCT(left_type[1])){
+            h->non_zero_count_cache[3+8*3]=
+            h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
+        }
+
+        if(IS_8x8DCT(mb_type)){
+            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp_table[mb_xy] & 1;
+
+            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
+
+            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
+
+            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
+        }
+    }
+
      if (FRAME_MBAFF
              // left mb is in picture
              && h->slice_table[mb_xy-1] != 255
@@ -6733,14 +6602,23 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                      int b_idx= 8 + 4 + edge * (dir ? 8:1);
                      int bn_idx= b_idx - (dir ? 8:1);
                      int v = 0;
-                    int xn= h->slice_type_nos == FF_B_TYPE && ref2frm[0][h->ref_cache[0][b_idx]+2] != ref2frmn[0][h->ref_cache[0][bn_idx]+2];
  
                      for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
-                        int ln= l^xn;
-                        v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
-                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
-                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
+                        v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
+                             FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                             FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
                      }
+
+                    if(h->slice_type_nos == FF_B_TYPE && v){
+                        v=0;
+                        for( l = 0; !v && l < 2; l++ ) {
+                            int ln= 1-l;
+                            v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
+                        }
+                    }
+
                      bS[0] = bS[1] = bS[2] = bS[3] = v;
                      mv_done = 1;
                  }
@@ -6759,17 +6637,28 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                      }
                      else if(!mv_done)
                      {
-                        int xn= h->slice_type_nos == FF_B_TYPE && ref2frm[0][h->ref_cache[0][b_idx]+2] != ref2frmn[0][h->ref_cache[0][bn_idx]+2];
                          bS[i] = 0;
                          for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
-                            int ln= l^xn;
-                            if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
-                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
-                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
+                            if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
                                  bS[i] = 1;
                                  break;
                              }
                          }
+
+                        if(h->slice_type_nos == FF_B_TYPE && bS[i]){
+                            bS[i] = 0;
+                            for( l = 0; l < 2; l++ ) {
+                                int ln= 1-l;
+                                if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
+                                    FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                    FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
+                                    bS[i] = 1;
+                                    break;
+                                }
+                            }
+                        }
                      }
                  }
  
@@ -7231,14 +7120,17 @@ static inline int decode_seq_parameter_set(H264Context *h){
      sps->level_idc= level_idc;
  
      if(sps->profile_idc >= 100){ //high profile
-        if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
+        sps->chroma_format_idc= get_ue_golomb(&s->gb);
+        if(sps->chroma_format_idc == 3)
              get_bits1(&s->gb);  //residual_color_transform_flag
          get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
          get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
          sps->transform_bypass = get_bits1(&s->gb);
          decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
-    }else
+    }else{
          sps->scaling_matrix_present = 0;
+        sps->chroma_format_idc= 1;
+    }
  
      sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
      sps->poc_type= get_ue_golomb(&s->gb);
@@ -7320,7 +7212,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
          decode_vui_parameters(h, sps);
  
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
                 sps_id, sps->profile_idc, sps->level_idc,
                 sps->poc_type,
                 sps->ref_frame_count,
@@ -7329,7 +7221,8 @@ static inline int decode_seq_parameter_set(H264Context *h){
                 sps->direct_8x8_inference_flag ? "8B8" : "",
                 sps->crop_left, sps->crop_right,
                 sps->crop_top, sps->crop_bottom,
-               sps->vui_parameters_present_flag ? "VUI" : ""
+               sps->vui_parameters_present_flag ? "VUI" : "",
+               ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
                 );
      }
      return 0;
@@ -7339,8 +7232,8 @@ static void
  build_qp_table(PPS *pps, int t, int index)
  {
      int i;
-    for(i = 0; i < 255; i++)
-        pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
+    for(i = 0; i < 52; i++)
+        pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
  }
  
  static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
@@ -7430,11 +7323,9 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
      }
  
      build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
-    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
-        build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
          h->pps.chroma_qp_diff= 1;
-    } else
-        memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
  
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
          av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
@@ -7662,7 +7553,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
          else if(err == 1) {
              /* Slice could not be decoded in parallel mode, copy down
               * NAL unit stuff to context 0 and restart. Note that
-             * rbsp_buffer is not transfered, but since we no longer
+             * rbsp_buffer is not transferred, but since we no longer
               * run in parallel mode this should not be an issue. */
              h->nal_unit_type = hx->nal_unit_type;
              h->nal_ref_idc   = hx->nal_ref_idc;
@@ -7679,17 +7570,10 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
   * returns the number of bytes consumed for building the current frame
   */
  static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        pos -= s->parse_context.last_index;
-        if(pos<0) pos=0; // FIXME remove (unneeded?)
-
-        return pos;
-    }else{
          if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
          if(pos+10>buf_size) pos=buf_size; // oops ;)
  
          return pos;
-    }
  }
  
  static int decode_frame(AVCodecContext *avctx,
@@ -7704,16 +7588,7 @@ static int decode_frame(AVCodecContext *avctx,
      s->flags= avctx->flags;
      s->flags2= avctx->flags2;
  
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        const int next= ff_h264_find_frame_end(h, buf, buf_size);
-        assert((buf_size > 0) || (next == END_NOT_FOUND));
-
-        if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
-          return buf_size;
-//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
-    }
-
-   /* no supplementary picture */
+   /* end of stream, output what is still in the buffers */
      if (buf_size == 0) {
          Picture *out;
          int i, out_idx;
@@ -7721,7 +7596,7 @@ static int decode_frame(AVCodecContext *avctx,
  //FIXME factorize this with the output code below
          out = h->delayed_pic[0];
          out_idx = 0;
-        for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+        for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
              if(h->delayed_pic[i]->poc < out->poc){
                  out = h->delayed_pic[i];
                  out_idx = i;
@@ -7804,13 +7679,13 @@ static int decode_frame(AVCodecContext *avctx,
          s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
          s->current_picture_ptr->pict_type= s->pict_type;
  
-        h->prev_frame_num_offset= h->frame_num_offset;
-        h->prev_frame_num= h->frame_num;
          if(!s->dropable) {
+            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
              h->prev_poc_msb= h->poc_msb;
              h->prev_poc_lsb= h->poc_lsb;
-            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
          }
+        h->prev_frame_num_offset= h->frame_num_offset;
+        h->prev_frame_num= h->frame_num;
  
          /*
           * FIXME: Error handling code does not seem to support interlaced
@@ -7829,7 +7704,7 @@ static int decode_frame(AVCodecContext *avctx,
  
          MPV_frame_end(s);
  
-        if (s->first_field) {
+        if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
              /* Wait for second field. */
              *data_size = 0;
  
@@ -7863,18 +7738,14 @@ static int decode_frame(AVCodecContext *avctx,
              if(cur->reference == 0)
                  cur->reference = DELAYED_PIC_REF;
  
-            cross_idr = 0;
-            for(i=0; h->delayed_pic[i]; i++)
-                if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
-                    cross_idr = 1;
-
              out = h->delayed_pic[0];
              out_idx = 0;
-            for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+            for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
                  if(h->delayed_pic[i]->poc < out->poc){
                      out = h->delayed_pic[i];
                      out_idx = i;
                  }
+            cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
  
              out_of_order = !cross_idr && out->poc < h->outputed_poc;
  
@@ -8137,7 +8008,7 @@ AVCodec h264_decoder = {
      NULL,
      decode_end,
      decode_frame,
-    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
      .flush= flush_dpb,
      .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
  };