]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/h264.c
matroskadec: simplify matroska_ebmlnum_sint()
[ffmpeg] / libavcodec / h264.c
index 4e5e7e7e0af7a48ecbd0009a2d9f0d160d186377..684d9a730cdc8f54ed4aabfd6fe1d8b99f5df297 100644 (file)
 #define DELAYED_PIC_REF 4
 
 static VLC coeff_token_vlc[4];
+static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
+static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
+
 static VLC chroma_dc_coeff_token_vlc;
+static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
+static const int chroma_dc_coeff_token_vlc_table_size = 256;
 
 static VLC total_zeros_vlc[15];
+static VLC_TYPE total_zeros_vlc_tables[15][512][2];
+static const int total_zeros_vlc_tables_size = 512;
+
 static VLC chroma_dc_total_zeros_vlc[3];
+static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
+static const int chroma_dc_total_zeros_vlc_tables_size = 8;
 
 static VLC run_vlc[6];
+static VLC_TYPE run_vlc_tables[6][8][2];
+static const int run_vlc_tables_size = 8;
+
 static VLC run7_vlc;
+static VLC_TYPE run7_vlc_table[96][2];
+static const int run7_vlc_table_size = 96;
 
 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static Picture * remove_long(H264Context *h, int i, int ref_mask);
 
 static av_always_inline uint32_t pack16to32(int a, int b){
 #ifdef WORDS_BIGENDIAN
@@ -78,13 +94,19 @@ const uint8_t ff_div6[52]={
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
 };
 
+static const int left_block_options[4][8]={
+    {0,1,2,3,7,10,8,11},
+    {2,2,3,3,8,11,8,11},
+    {0,0,1,1,7,10,7,10},
+    {0,2,0,2,7,10,7,10}
+};
 
 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
     int topleft_type, top_type, topright_type, left_type[2];
-    int left_block[8];
+    int * left_block;
     int topleft_partition= -1;
     int i;
 
@@ -100,14 +122,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
     topleft_xy = top_xy - 1;
     topright_xy= top_xy + 1;
     left_xy[1] = left_xy[0] = mb_xy-1;
-    left_block[0]= 0;
-    left_block[1]= 1;
-    left_block[2]= 2;
-    left_block[3]= 3;
-    left_block[4]= 7;
-    left_block[5]= 10;
-    left_block[6]= 8;
-    left_block[7]= 11;
+    left_block = left_block_options[0];
     if(FRAME_MBAFF){
         const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
         const int top_pair_xy      = pair_xy     - s->mb_stride;
@@ -133,7 +148,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
             topleft_xy -= s->mb_stride;
         } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
             topleft_xy += s->mb_stride;
-            // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
             topleft_partition = 0;
         }
         if (bottom
@@ -146,34 +161,13 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
             left_xy[1] = left_xy[0] = pair_xy - 1;
             if (curr_mb_frame_flag) {
                 if (bottom) {
-                    left_block[0]= 2;
-                    left_block[1]= 2;
-                    left_block[2]= 3;
-                    left_block[3]= 3;
-                    left_block[4]= 8;
-                    left_block[5]= 11;
-                    left_block[6]= 8;
-                    left_block[7]= 11;
+                    left_block = left_block_options[1];
                 } else {
-                    left_block[0]= 0;
-                    left_block[1]= 0;
-                    left_block[2]= 1;
-                    left_block[3]= 1;
-                    left_block[4]= 7;
-                    left_block[5]= 10;
-                    left_block[6]= 7;
-                    left_block[7]= 10;
+                    left_block= left_block_options[2];
                 }
             } else {
                 left_xy[1] += s->mb_stride;
-                //left_block[0]= 0;
-                left_block[1]= 2;
-                left_block[2]= 0;
-                left_block[3]= 2;
-                //left_block[4]= 7;
-                left_block[5]= 10;
-                left_block[6]= 7;
-                left_block[7]= 10;
+                left_block = left_block_options[3];
             }
         }
     }
@@ -292,7 +286,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
 4 L . .L . . . .
 5 L . .. . . . .
 */
-//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
     if(top_type){
         h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
@@ -481,7 +475,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
                 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
 
-                if(h->slice_type == FF_B_TYPE){
+                if(h->slice_type_nos == FF_B_TYPE){
                     fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
 
                     if(IS_DIRECT(top_type)){
@@ -673,7 +667,7 @@ static inline void write_back_non_zero_count(H264Context *h){
 }
 
 /**
- * gets the predicted number of non zero coefficients.
+ * gets the predicted number of non-zero coefficients.
  * @param n block index
  */
 static inline int pred_non_zero_count(H264Context *h, int n){
@@ -728,7 +722,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
             if(MB_FIELD
                && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
                && i >= scan8[0]+8){
-                // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
+                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
                 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
             }
         }
@@ -894,13 +888,14 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
 }
 
 static inline void direct_dist_scale_factor(H264Context * const h){
-    const int poc = h->s.current_picture_ptr->poc;
+    MpegEncContext * const s = &h->s;
+    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
     const int poc1 = h->ref_list[1][0].poc;
     int i;
     for(i=0; i<h->ref_count[0]; i++){
         int poc0 = h->ref_list[0][i].poc;
         int td = av_clip(poc1 - poc0, -128, 127);
-        if(td == 0 /* FIXME || pic0 is a long-term ref */){
+        if(td == 0 || h->ref_list[0][i].long_ref){
             h->dist_scale_factor[i] = 256;
         }else{
             int tb = av_clip(poc - poc0, -128, 127);
@@ -920,23 +915,27 @@ static inline void direct_ref_list_init(H264Context * const h){
     Picture * const ref1 = &h->ref_list[1][0];
     Picture * const cur = s->current_picture_ptr;
     int list, i, j;
-    if(cur->pict_type == FF_I_TYPE)
-        cur->ref_count[0] = 0;
-    if(cur->pict_type != FF_B_TYPE)
-        cur->ref_count[1] = 0;
+    int sidx= s->picture_structure&1;
+    int ref1sidx= ref1->reference&1;
     for(list=0; list<2; list++){
-        cur->ref_count[list] = h->ref_count[list];
+        cur->ref_count[sidx][list] = h->ref_count[list];
         for(j=0; j<h->ref_count[list]; j++)
-            cur->ref_poc[list][j] = h->ref_list[list][j].poc;
+            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
+    }
+    if(s->picture_structure == PICT_FRAME){
+        memcpy(cur->ref_count[0], cur->ref_count[1], sizeof(cur->ref_count[0]));
+        memcpy(cur->ref_poc  [0], cur->ref_poc  [1], sizeof(cur->ref_poc  [0]));
     }
     if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
         return;
     for(list=0; list<2; list++){
-        for(i=0; i<ref1->ref_count[list]; i++){
-            const int poc = ref1->ref_poc[list][i];
+        for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
+            int poc = ref1->ref_poc[ref1sidx][list][i];
+            if(((poc&3) == 3) != (s->picture_structure == PICT_FRAME))
+                poc= (poc&~3) + s->picture_structure;
             h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
             for(j=0; j<h->ref_count[list]; j++)
-                if(h->ref_list[list][j].poc == poc){
+                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
                     h->map_col_to_list0[list][i] = j;
                     break;
                 }
@@ -944,7 +943,7 @@ static inline void direct_ref_list_init(H264Context * const h){
     }
     if(FRAME_MBAFF){
         for(list=0; list<2; list++){
-            for(i=0; i<ref1->ref_count[list]; i++){
+            for(i=0; i<ref1->ref_count[ref1sidx][list]; i++){
                 j = h->map_col_to_list0[list][i];
                 h->map_col_to_list0_field[list][2*i] = 2*j;
                 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
@@ -955,9 +954,10 @@ static inline void direct_ref_list_init(H264Context * const h){
 
 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
     MpegEncContext * const s = &h->s;
-    const int mb_xy =   h->mb_xy;
-    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
-    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+    const int fieldoff= (s->picture_structure & h->ref_list[1][0].reference) ? 0 : (3-2*s->picture_structure);
+    const int mb_xy =   h->mb_xy + s->mb_stride*fieldoff;
+    const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride + 2*h->b8_stride*fieldoff;
+    const int b4_xy = 4*s->mb_x + 4*s->mb_y*h-> b_stride + 4*h-> b_stride*fieldoff;
     const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
     const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
     const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
@@ -999,13 +999,9 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
             int refa = h->ref_cache[list][scan8[0] - 1];
             int refb = h->ref_cache[list][scan8[0] - 8];
             int refc = h->ref_cache[list][scan8[0] - 8 + 4];
-            if(refc == -2)
+            if(refc == PART_NOT_AVAILABLE)
                 refc = h->ref_cache[list][scan8[0] - 8 - 1];
-            ref[list] = refa;
-            if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
-                ref[list] = refb;
-            if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
-                ref[list] = refc;
+            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
             if(ref[list] < 0)
                 ref[list] = -1;
         }
@@ -1156,8 +1152,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
         const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
         const int *dist_scale_factor = h->dist_scale_factor;
 
-        if(FRAME_MBAFF){
-            if(IS_INTERLACED(*mb_type)){
+            if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
                 map_col_to_list0[0] = h->map_col_to_list0_field[0];
                 map_col_to_list0[1] = h->map_col_to_list0_field[1];
                 dist_scale_factor = h->dist_scale_factor_field;
@@ -1166,7 +1161,10 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                 /* FIXME assumes direct_8x8_inference == 1 */
                 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
                 int mb_types_col[2];
+                int b8_stride = h->b8_stride;
+                int b4_stride = h->b_stride;
                 int y_shift;
+                int ref_shift;
 
                 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
                          | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
@@ -1184,6 +1182,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                         l1mv1 -= 4*h->b_stride;
                     }
                     y_shift = 0;
+                    ref_shift= FRAME_MBAFF ? 0 : 1;
 
                     if(   (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
                        && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
@@ -1191,24 +1190,28 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                         *mb_type |= MB_TYPE_16x8;
                     else
                         *mb_type |= MB_TYPE_8x8;
+                    b8_stride *= 3;
+                    b4_stride *= 6;
                 }else{
-                    /* field to frame scaling */
-                    /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
-                     * but in MBAFF, top and bottom POC are equal */
-                    int dy = (s->mb_y&1) ? 1 : 2;
+                    int cur_poc = s->current_picture_ptr->poc;
+                    int *col_poc = h->ref_list[1]->field_poc;
+                    int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+                    int dy = 2*col_parity - (s->mb_y&1);
                     mb_types_col[0] =
-                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
                     l1ref0 += dy*h->b8_stride;
                     l1ref1 += dy*h->b8_stride;
                     l1mv0 += 2*dy*h->b_stride;
                     l1mv1 += 2*dy*h->b_stride;
                     y_shift = 2;
+                    ref_shift= FRAME_MBAFF ? 2 : 1;
 
                     if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
                        && !is_b8x8)
                         *mb_type |= MB_TYPE_16x16;
                     else
                         *mb_type |= MB_TYPE_8x8;
+                    b8_stride = 0;
                 }
 
                 for(i8=0; i8<4; i8++){
@@ -1229,18 +1232,18 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                         continue;
                     }
 
-                    ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
+                    ref0 = l1ref0[x8 + y8*b8_stride];
                     if(ref0 >= 0)
-                        ref0 = map_col_to_list0[0][ref0*2>>y_shift];
+                        ref0 = map_col_to_list0[0][ref0*2>>ref_shift];
                     else{
-                        ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
+                        ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride]*2>>ref_shift];
                         l1mv= l1mv1;
                     }
                     scale = dist_scale_factor[ref0];
                     fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
 
                     {
-                        const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
+                        const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
                         int my_col = (mv_col[1]<<y_shift)/2;
                         int mx = (scale * mv_col[0] + 128) >> 8;
                         int my = (scale * my_col + 128) >> 8;
@@ -1250,7 +1253,6 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                 }
                 return;
             }
-        }
 
         /* one-to-one mv scaling */
 
@@ -1360,7 +1362,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
         }
     }
 
-    if(h->slice_type == FF_B_TYPE && h->pps.cabac){
+    if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
         if(IS_8X8(mb_type)){
             uint8_t *direct_table = &h->direct_table[b8_xy];
             direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
@@ -1458,7 +1460,7 @@ static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
 }
 
 /**
- * idct tranforms the 16 dc values and dequantize them.
+ * IDCT transforms the 16 dc values and dequantizes them.
  * @param qp quantization parameter
  */
 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
@@ -1490,7 +1492,7 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
         const int z2= temp[4*1+i] - temp[4*3+i];
         const int z3= temp[4*1+i] + temp[4*3+i];
 
-        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
+        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
         block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
         block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
         block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
@@ -1499,7 +1501,7 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
 
 #if 0
 /**
- * dct tranforms the 16 dc values.
+ * DCT transforms the 16 dc values.
  * @param qp quantization parameter ??? FIXME
  */
 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
@@ -1588,7 +1590,7 @@ static void chroma_dc_dct_c(DCTELEM *block){
  * gets the chroma qp.
  */
 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
-    return h->pps.chroma_qp_table[t][qscale & 0xff];
+    return h->pps.chroma_qp_table[t][qscale];
 }
 
 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
@@ -1688,7 +1690,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
     const int pic_width  = 16*s->mb_width;
     const int pic_height = 16*s->mb_height >> MB_FIELD;
 
-    if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
+    if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
         return;
 
     if(mx&7) extra_width -= 3;
@@ -1958,37 +1960,67 @@ static av_cold void decode_init_vlc(void){
 
     if (!done) {
         int i;
+        int offset;
         done = 1;
 
+        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
+        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
         init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                  &chroma_dc_coeff_token_len [0], 1, 1,
-                 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
+                 &chroma_dc_coeff_token_bits[0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
 
+        offset = 0;
         for(i=0; i<4; i++){
+            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
+            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
             init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                      &coeff_token_len [i][0], 1, 1,
-                     &coeff_token_bits[i][0], 1, 1, 1);
+                     &coeff_token_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
+            offset += coeff_token_vlc_tables_size[i];
         }
+        /*
+         * This is a one time safety check to make sure that
+         * the packed static coeff_token_vlc table sizes
+         * were initialized correctly.
+         */
+        assert(offset == sizeof(coeff_token_vlc_tables)/(sizeof(VLC_TYPE)*2));
 
         for(i=0; i<3; i++){
-            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
+            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
+            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
+            init_vlc(&chroma_dc_total_zeros_vlc[i],
+                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
                      &chroma_dc_total_zeros_len [i][0], 1, 1,
-                     &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
+                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
         for(i=0; i<15; i++){
-            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
+            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
+            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
+            init_vlc(&total_zeros_vlc[i],
+                     TOTAL_ZEROS_VLC_BITS, 16,
                      &total_zeros_len [i][0], 1, 1,
-                     &total_zeros_bits[i][0], 1, 1, 1);
+                     &total_zeros_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
 
         for(i=0; i<6; i++){
-            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
+            run_vlc[i].table = run_vlc_tables[i];
+            run_vlc[i].table_allocated = run_vlc_tables_size;
+            init_vlc(&run_vlc[i],
+                     RUN_VLC_BITS, 7,
                      &run_len [i][0], 1, 1,
-                     &run_bits[i][0], 1, 1, 1);
+                     &run_bits[i][0], 1, 1,
+                     INIT_VLC_USE_NEW_STATIC);
         }
+        run7_vlc.table = run7_vlc_table,
+        run7_vlc.table_allocated = run7_vlc_table_size;
         init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                  &run_len [6][0], 1, 1,
-                 &run_bits[6][0], 1, 1, 1);
+                 &run_bits[6][0], 1, 1,
+                 INIT_VLC_USE_NEW_STATIC);
     }
 }
 
@@ -2200,7 +2232,11 @@ static av_cold int decode_init(AVCodecContext *avctx){
 //    s->decode_mb= ff_h263_decode_mb;
     s->quarter_sample = 1;
     s->low_delay= 1;
-    avctx->pix_fmt= PIX_FMT_YUV420P;
+
+    if(avctx->codec_id == CODEC_ID_SVQ3)
+        avctx->pix_fmt= PIX_FMT_YUVJ420P;
+    else
+        avctx->pix_fmt= PIX_FMT_YUV420P;
 
     decode_init_vlc();
 
@@ -2213,6 +2249,7 @@ static av_cold int decode_init(AVCodecContext *avctx){
     }
 
     h->thread_context[0] = h;
+    h->outputed_poc = INT_MIN;
     return 0;
 }
 
@@ -2226,7 +2263,7 @@ static int frame_start(H264Context *h){
     /*
      * MPV_frame_start uses pict_type to derive key_frame.
      * This is incorrect for H.264; IDR markings must be used.
-     * Zero here; IDR markings per slice in frame or fields are OR'd in later.
+     * Zero here; IDR markings per slice in frame or fields are ORed in later.
      * See decode_nal_units().
      */
     s->current_picture_ptr->key_frame= 0;
@@ -2255,6 +2292,20 @@ static int frame_start(H264Context *h){
         memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
 
 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
+
+    // We mark the current picture as non-reference after allocating it, so
+    // that if we break out due to an error it can be released automatically
+    // in the next MPV_frame_start().
+    // SVQ3 as well as most other codecs have only last/next/current and thus
+    // get released even with set reference, besides SVQ3 and others do not
+    // mark frames as reference later "naturally".
+    if(s->codec_id != CODEC_ID_SVQ3)
+        s->current_picture_ptr->reference= 0;
+
+    s->current_picture_ptr->field_poc[0]=
+    s->current_picture_ptr->field_poc[1]= INT_MAX;
+    assert(s->current_picture_ptr->long_ref==0);
+
     return 0;
 }
 
@@ -2458,7 +2509,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
         linesize   = h->mb_linesize   = s->linesize * 2;
         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
         block_offset = &h->block_offset[24];
-        if(mb_y&1){ //FIXME move out of this func?
+        if(mb_y&1){ //FIXME move out of this function?
             dest_y -= s->linesize*15;
             dest_cb-= s->uvlinesize*7;
             dest_cr-= s->uvlinesize*7;
@@ -2508,30 +2559,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
     }
 
     if (!simple && IS_INTRA_PCM(mb_type)) {
-        unsigned int x, y;
-
-        // The pixels are stored in h->mb array in the same order as levels,
-        // copy them in output in the correct order.
-        for(i=0; i<16; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
-        }
-        for(i=16; i<16+4; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
+        for (i=0; i<16; i++) {
+            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
         }
-        for(i=20; i<20+4; i++) {
-            for (y=0; y<4; y++) {
-                for (x=0; x<4; x++) {
-                    *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
-                }
-            }
+        for (i=0; i<8; i++) {
+            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
+            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
         }
     } else {
         if(IS_INTRA(mb_type)){
@@ -2703,6 +2736,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
             tprintf(h->s.avctx, "call filter_mb\n");
             backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
             fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
+            h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
+            h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
             filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
         }
     }
@@ -2726,7 +2761,8 @@ static void hl_decode_mb(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= h->mb_xy;
     const int mb_type= s->current_picture.mb_type[mb_xy];
-    int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
+    int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
+                    (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
 
     if(ENABLE_H264_ENCODER && !s->decode)
         return;
@@ -2744,6 +2780,7 @@ static void pic_as_field(Picture *pic, const int parity){
         pic->reference = parity;
         pic->linesize[i] *= 2;
     }
+    pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
 }
 
 static int split_field_copy(Picture *dest, Picture *src,
@@ -2752,87 +2789,57 @@ static int split_field_copy(Picture *dest, Picture *src,
 
     if (match) {
         *dest = *src;
-        pic_as_field(dest, parity);
-        dest->pic_id *= 2;
-        dest->pic_id += id_add;
+        if(parity != PICT_FRAME){
+            pic_as_field(dest, parity);
+            dest->pic_id *= 2;
+            dest->pic_id += id_add;
+        }
     }
 
     return match;
 }
 
-/**
- * Split one reference list into field parts, interleaving by parity
- * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
- * set to look at the actual start of data for that field.
- *
- * @param dest output list
- * @param dest_len maximum number of fields to put in dest
- * @param src the source reference list containing fields and/or field pairs
- *            (aka short_ref/long_ref, or
- *             refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
- * @param src_len number of Picture's in source (pairs and unmatched fields)
- * @param parity the parity of the picture being decoded/needing
- *        these ref pics (PICT_{TOP,BOTTOM}_FIELD)
- * @return number of fields placed in dest
- */
-static int split_field_half_ref_list(Picture *dest, int dest_len,
-                                     Picture *src,  int src_len,  int parity){
-    int same_parity   = 1;
-    int same_i        = 0;
-    int opp_i         = 0;
-    int out_i;
-    int field_output;
-
-    for (out_i = 0; out_i < dest_len; out_i += field_output) {
-        if (same_parity && same_i < src_len) {
-            field_output = split_field_copy(dest + out_i, src + same_i,
-                                            parity, 1);
-            same_parity = !field_output;
-            same_i++;
-
-        } else if (opp_i < src_len) {
-            field_output = split_field_copy(dest + out_i, src + opp_i,
-                                            PICT_FRAME - parity, 0);
-            same_parity = field_output;
-            opp_i++;
+static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
+    int i[2]={0};
+    int index=0;
 
-        } else {
-            break;
+    while(i[0]<len || i[1]<len){
+        while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
+            i[0]++;
+        while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
+            i[1]++;
+        if(i[0] < len){
+            in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[0]++ ], sel  , 1);
+        }
+        if(i[1] < len){
+            in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
+            split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
         }
     }
 
-    return out_i;
+    return index;
 }
 
-/**
- * Split the reference frame list into a reference field list.
- * This implements H.264 spec 8.2.4.2.5 for a combined input list.
- * The input list contains both reference field pairs and
- * unmatched reference fields; it is ordered as spec describes
- * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
- * unmatched field pairs are also present. Conceptually this is equivalent
- * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
- *
- * @param dest output reference list where ordered fields are to be placed
- * @param dest_len max number of fields to place at dest
- * @param src source reference list, as described above
- * @param src_len number of pictures (pairs and unmatched fields) in src
- * @param parity parity of field being currently decoded
- *        (one of PICT_{TOP,BOTTOM}_FIELD)
- * @param long_i index into src array that holds first long reference picture,
- *        or src_len if no long refs present.
- */
-static int split_field_ref_list(Picture *dest, int dest_len,
-                                Picture *src,  int src_len,
-                                int parity,    int long_i){
+static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
+    int i, best_poc;
+    int out_i= 0;
 
-    int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
-    dest += i;
-    dest_len -= i;
+    for(;;){
+        best_poc= dir ? INT_MIN : INT_MAX;
 
-    i += split_field_half_ref_list(dest, dest_len, src + long_i,
-                                   src_len - long_i, parity);
-    return i;
+        for(i=0; i<len; i++){
+            const int poc= src[i]->poc;
+            if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
+                best_poc= poc;
+                sorted[out_i]= src[i];
+            }
+        }
+        if(best_poc == (dir ? INT_MIN : INT_MAX))
+            break;
+        limit= sorted[out_i++]->poc - dir;
+    }
+    return out_i;
 }
 
 /**
@@ -2840,146 +2847,48 @@ static int split_field_ref_list(Picture *dest, int dest_len,
  */
 static int fill_default_ref_list(H264Context *h){
     MpegEncContext * const s = &h->s;
-    int i;
-    int smallest_poc_greater_than_current = -1;
-    int structure_sel;
-    Picture sorted_short_ref[32];
-    Picture field_entry_list[2][32];
-    Picture *frame_list[2];
-
-    if (FIELD_PICTURE) {
-        structure_sel = PICT_FRAME;
-        frame_list[0] = field_entry_list[0];
-        frame_list[1] = field_entry_list[1];
-    } else {
-        structure_sel = 0;
-        frame_list[0] = h->default_ref_list[0];
-        frame_list[1] = h->default_ref_list[1];
-    }
+    int i, len;
 
-    if(h->slice_type==FF_B_TYPE){
-        int list;
-        int len[2];
-        int short_len[2];
-        int out_i;
-        int limit= INT_MIN;
-
-        /* sort frame according to poc in B slice */
-        for(out_i=0; out_i<h->short_ref_count; out_i++){
-            int best_i=INT_MIN;
-            int best_poc=INT_MAX;
-
-            for(i=0; i<h->short_ref_count; i++){
-                const int poc= h->short_ref[i]->poc;
-                if(poc > limit && poc < best_poc){
-                    best_poc= poc;
-                    best_i= i;
-                }
-            }
+    if(h->slice_type_nos==FF_B_TYPE){
+        Picture *sorted[32];
+        int cur_poc, list;
+        int lens[2];
 
-            assert(best_i != INT_MIN);
-
-            limit= best_poc;
-            sorted_short_ref[out_i]= *h->short_ref[best_i];
-            tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
-            if (-1 == smallest_poc_greater_than_current) {
-                if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
-                    smallest_poc_greater_than_current = out_i;
-                }
-            }
-        }
-
-        tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
-
-        // find the largest poc
-        for(list=0; list<2; list++){
-            int index = 0;
-            int j= -99;
-            int step= list ? -1 : 1;
-
-            for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
-                int sel;
-                while(j<0 || j>= h->short_ref_count){
-                    if(j != -99 && step == (list ? -1 : 1))
-                        return -1;
-                    step = -step;
-                    j= smallest_poc_greater_than_current + (step>>1);
-                }
-                sel = sorted_short_ref[j].reference | structure_sel;
-                if(sel != PICT_FRAME) continue;
-                frame_list[list][index  ]= sorted_short_ref[j];
-                frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
-            }
-            short_len[list] = index;
+        if(FIELD_PICTURE)
+            cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
+        else
+            cur_poc= s->current_picture_ptr->poc;
 
-            for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
-                int sel;
-                if(h->long_ref[i] == NULL) continue;
-                sel = h->long_ref[i]->reference | structure_sel;
-                if(sel != PICT_FRAME) continue;
+        for(list= 0; list<2; list++){
+            len= add_sorted(sorted    , h->short_ref, h->short_ref_count, cur_poc, 1^list);
+            len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
+            assert(len<=32);
+            len= build_def_list(h->default_ref_list[list]    , sorted     , len, 0, s->picture_structure);
+            len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
+            assert(len<=32);
 
-                frame_list[ list ][index  ]= *h->long_ref[i];
-                frame_list[ list ][index++].pic_id= i;
-            }
-            len[list] = index;
+            if(len < h->ref_count[list])
+                memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
+            lens[list]= len;
         }
 
-        for(list=0; list<2; list++){
-            if (FIELD_PICTURE)
-                len[list] = split_field_ref_list(h->default_ref_list[list],
-                                                 h->ref_count[list],
-                                                 frame_list[list],
-                                                 len[list],
-                                                 s->picture_structure,
-                                                 short_len[list]);
-
-            // swap the two first elements of L1 when L0 and L1 are identical
-            if(list && len[0] > 1 && len[0] == len[1])
-                for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
-                    if(i == len[0]){
-                        FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
-                        break;
-                    }
-
-            if(len[list] < h->ref_count[ list ])
-                memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
+        if(lens[0] == lens[1] && lens[1] > 1){
+            for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
+            if(i == lens[0])
+                FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
         }
-
-
     }else{
-        int index=0;
-        int short_len;
-        for(i=0; i<h->short_ref_count; i++){
-            int sel;
-            sel = h->short_ref[i]->reference | structure_sel;
-            if(sel != PICT_FRAME) continue;
-            frame_list[0][index  ]= *h->short_ref[i];
-            frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
-        }
-        short_len = index;
-        for(i = 0; i < 16; i++){
-            int sel;
-            if(h->long_ref[i] == NULL) continue;
-            sel = h->long_ref[i]->reference | structure_sel;
-            if(sel != PICT_FRAME) continue;
-            frame_list[0][index  ]= *h->long_ref[i];
-            frame_list[0][index++].pic_id= i;
-        }
-
-        if (FIELD_PICTURE)
-            index = split_field_ref_list(h->default_ref_list[0],
-                                         h->ref_count[0], frame_list[0],
-                                         index, s->picture_structure,
-                                         short_len);
-
-        if(index < h->ref_count[0])
-            memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
+        len = build_def_list(h->default_ref_list[0]    , h->short_ref, h->short_ref_count, 0, s->picture_structure);
+        len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16                , 1, s->picture_structure);
+        assert(len <= 32);
+        if(len < h->ref_count[0])
+            memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
     }
 #ifdef TRACE
     for (i=0; i<h->ref_count[0]; i++) {
         tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
     }
-    if(h->slice_type==FF_B_TYPE){
+    if(h->slice_type_nos==FF_B_TYPE){
         for (i=0; i<h->ref_count[1]; i++) {
             tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
         }
@@ -3021,7 +2930,6 @@ static int decode_ref_pic_list_reordering(H264Context *h){
 
     print_short_term(h);
     print_long_term(h);
-    if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
 
     for(list=0; list<h->list_count; list++){
         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
@@ -3063,10 +2971,10 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                             ref = h->short_ref[i];
                             assert(ref->reference);
                             assert(!ref->long_ref);
-                            if(ref->data[0] != NULL &&
+                            if(
                                    ref->frame_num == frame_num &&
-                                   (ref->reference & pic_structure) &&
-                                   ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
+                                   (ref->reference & pic_structure)
+                              )
                                 break;
                         }
                         if(i>=0)
@@ -3117,12 +3025,14 @@ static int decode_ref_pic_list_reordering(H264Context *h){
     }
     for(list=0; list<h->list_count; list++){
         for(index= 0; index < h->ref_count[list]; index++){
-            if(!h->ref_list[list][index].data[0])
-                h->ref_list[list][index]= s->current_picture;
+            if(!h->ref_list[list][index].data[0]){
+                av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
+                h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
+            }
         }
     }
 
-    if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
+    if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
         direct_dist_scale_factor(h);
     direct_ref_list_init(h);
     return 0;
@@ -3187,25 +3097,27 @@ static int pred_weight_table(H264Context *h){
                 h->luma_offset[list][i]= 0;
             }
 
-            chroma_weight_flag= get_bits1(&s->gb);
-            if(chroma_weight_flag){
-                int j;
-                for(j=0; j<2; j++){
-                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
-                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
-                    if(   h->chroma_weight[list][i][j] != chroma_def
-                       || h->chroma_offset[list][i][j] != 0)
-                        h->use_weight_chroma= 1;
-                }
-            }else{
-                int j;
-                for(j=0; j<2; j++){
-                    h->chroma_weight[list][i][j]= chroma_def;
-                    h->chroma_offset[list][i][j]= 0;
+            if(CHROMA){
+                chroma_weight_flag= get_bits1(&s->gb);
+                if(chroma_weight_flag){
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
+                        h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
+                        if(   h->chroma_weight[list][i][j] != chroma_def
+                        || h->chroma_offset[list][i][j] != 0)
+                            h->use_weight_chroma= 1;
+                    }
+                }else{
+                    int j;
+                    for(j=0; j<2; j++){
+                        h->chroma_weight[list][i][j]= chroma_def;
+                        h->chroma_offset[list][i][j]= 0;
+                    }
                 }
             }
         }
-        if(h->slice_type != FF_B_TYPE) break;
+        if(h->slice_type_nos != FF_B_TYPE) break;
     }
     h->use_weight= h->use_weight || h->use_weight_chroma;
     return 0;
@@ -3263,15 +3175,11 @@ static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
     if (pic->reference &= refmask) {
         return 0;
     } else {
-        if(pic == h->delayed_output_pic)
-            pic->reference=DELAYED_PIC_REF;
-        else{
-            for(i = 0; h->delayed_pic[i]; i++)
-                if(pic == h->delayed_pic[i]){
-                    pic->reference=DELAYED_PIC_REF;
-                    break;
-                }
-        }
+        for(i = 0; h->delayed_pic[i]; i++)
+            if(pic == h->delayed_pic[i]){
+                pic->reference=DELAYED_PIC_REF;
+                break;
+            }
         return 1;
     }
 }
@@ -3283,32 +3191,31 @@ static void idr(H264Context *h){
     int i;
 
     for(i=0; i<16; i++){
-        if (h->long_ref[i] != NULL) {
-            unreference_pic(h, h->long_ref[i], 0);
-            h->long_ref[i]= NULL;
-        }
+        remove_long(h, i, 0);
     }
-    h->long_ref_count=0;
+    assert(h->long_ref_count==0);
 
     for(i=0; i<h->short_ref_count; i++){
         unreference_pic(h, h->short_ref[i], 0);
         h->short_ref[i]= NULL;
     }
     h->short_ref_count=0;
+    h->prev_frame_num= 0;
+    h->prev_frame_num_offset= 0;
+    h->prev_poc_msb=
+    h->prev_poc_lsb= 0;
 }
 
 /* forget old pics after a seek */
 static void flush_dpb(AVCodecContext *avctx){
     H264Context *h= avctx->priv_data;
     int i;
-    for(i=0; i<16; i++) {
+    for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
         if(h->delayed_pic[i])
             h->delayed_pic[i]->reference= 0;
         h->delayed_pic[i]= NULL;
     }
-    if(h->delayed_output_pic)
-        h->delayed_output_pic->reference= 0;
-    h->delayed_output_pic= NULL;
+    h->outputed_poc= INT_MIN;
     idr(h);
     if(h->s.current_picture_ptr)
         h->s.current_picture_ptr->reference= 0;
@@ -3347,7 +3254,7 @@ static Picture * find_short(H264Context *h, int frame_num, int *idx){
  * @param i index into h->short_ref of picture to remove.
  */
 static void remove_short_at_index(H264Context *h, int i){
-    assert(i > 0 && i < h->short_ref_count);
+    assert(i >= 0 && i < h->short_ref_count);
     h->short_ref[i]= NULL;
     if (--h->short_ref_count)
         memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
@@ -3357,7 +3264,7 @@ static void remove_short_at_index(H264Context *h, int i){
  *
  * @return the removed picture or NULL if an error occurs
  */
-static Picture * remove_short(H264Context *h, int frame_num){
+static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
     MpegEncContext * const s = &h->s;
     Picture *pic;
     int i;
@@ -3366,33 +3273,31 @@ static Picture * remove_short(H264Context *h, int frame_num){
         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
 
     pic = find_short(h, frame_num, &i);
-    if (pic)
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask))
         remove_short_at_index(h, i);
+    }
 
     return pic;
 }
 
 /**
  * Remove a picture from the long term reference list by its index in
- * that list.  This does no checking on the provided index; it is assumed
- * to be valid. The removed entry is set to NULL. Other entries are unaffected.
- * @param i index into h->long_ref of picture to remove.
- */
-static void remove_long_at_index(H264Context *h, int i){
-    h->long_ref[i]= NULL;
-    h->long_ref_count--;
-}
-
-/**
- *
+ * that list.
  * @return the removed picture or NULL if an error occurs
  */
-static Picture * remove_long(H264Context *h, int i){
+static Picture * remove_long(H264Context *h, int i, int ref_mask){
     Picture *pic;
 
     pic= h->long_ref[i];
-    if (pic)
-        remove_long_at_index(h, i);
+    if (pic){
+        if(unreference_pic(h, pic, ref_mask)){
+            assert(h->long_ref[i]->long_ref == 1);
+            h->long_ref[i]->long_ref= 0;
+            h->long_ref[i]= NULL;
+            h->long_ref_count--;
+        }
+    }
 
     return pic;
 }
@@ -3440,73 +3345,58 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
 
     for(i=0; i<mmco_count; i++){
-        int structure, frame_num, unref_pic;
+        int structure, frame_num;
         if(s->avctx->debug&FF_DEBUG_MMCO)
             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
 
+        if(   mmco[i].opcode == MMCO_SHORT2UNUSED
+           || mmco[i].opcode == MMCO_SHORT2LONG){
+            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
+            pic = find_short(h, frame_num, &j);
+            if(!pic){
+                if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
+                   || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
+                av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
+                continue;
+            }
+        }
+
         switch(mmco[i].opcode){
         case MMCO_SHORT2UNUSED:
             if(s->avctx->debug&FF_DEBUG_MMCO)
                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
-            frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
-            pic = find_short(h, frame_num, &j);
-            if (pic) {
-                if (unreference_pic(h, pic, structure ^ PICT_FRAME))
-                    remove_short_at_index(h, j);
-            } else if(s->avctx->debug&FF_DEBUG_MMCO)
-                av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
+            remove_short(h, frame_num, structure ^ PICT_FRAME);
             break;
         case MMCO_SHORT2LONG:
-            if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
-                    h->long_ref[mmco[i].long_arg]->frame_num ==
-                                              mmco[i].short_pic_num / 2) {
-                /* do nothing, we've already moved this field pair. */
-            } else {
-                int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
+                if (h->long_ref[mmco[i].long_arg] != pic)
+                    remove_long(h, mmco[i].long_arg, 0);
 
-                pic= remove_long(h, mmco[i].long_arg);
-                if(pic) unreference_pic(h, pic, 0);
-
-                h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
+                remove_short_at_index(h, j);
+                h->long_ref[ mmco[i].long_arg ]= pic;
                 if (h->long_ref[ mmco[i].long_arg ]){
                     h->long_ref[ mmco[i].long_arg ]->long_ref=1;
                     h->long_ref_count++;
                 }
-            }
             break;
         case MMCO_LONG2UNUSED:
             j = pic_num_extract(h, mmco[i].long_arg, &structure);
             pic = h->long_ref[j];
             if (pic) {
-                if (unreference_pic(h, pic, structure ^ PICT_FRAME))
-                    remove_long_at_index(h, j);
+                remove_long(h, j, structure ^ PICT_FRAME);
             } else if(s->avctx->debug&FF_DEBUG_MMCO)
                 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
             break;
         case MMCO_LONG:
-            unref_pic = 1;
-            if (FIELD_PICTURE && !s->first_field) {
-                if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
-                    /* Just mark second field as referenced */
-                    unref_pic = 0;
-                } else if (s->current_picture_ptr->reference) {
+                    // Comment below left from previous code as it is an interresting note.
                     /* First field in pair is in short term list or
                      * at a different long term index.
-                     * This is not allowed; see 7.4.3, notes 2 and 3.
+                     * This is not allowed; see 7.4.3.3, notes 2 and 3.
                      * Report the problem and keep the pair where it is,
                      * and mark this field valid.
                      */
-                    av_log(h->s.avctx, AV_LOG_ERROR,
-                        "illegal long term reference assignment for second "
-                        "field in complementary field pair (first field is "
-                        "short term or has non-matching long index)\n");
-                    unref_pic = 0;
-                }
-            }
 
-            if (unref_pic) {
-                pic= remove_long(h, mmco[i].long_arg);
-                if(pic) unreference_pic(h, pic, 0);
+            if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
+                remove_long(h, mmco[i].long_arg, 0);
 
                 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
                 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
@@ -3520,32 +3410,34 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
             assert(mmco[i].long_arg <= 16);
             // just remove the long term which index is greater than new max
             for(j = mmco[i].long_arg; j<16; j++){
-                pic = remove_long(h, j);
-                if (pic) unreference_pic(h, pic, 0);
+                remove_long(h, j, 0);
             }
             break;
         case MMCO_RESET:
             while(h->short_ref_count){
-                pic= remove_short(h, h->short_ref[0]->frame_num);
-                if(pic) unreference_pic(h, pic, 0);
+                remove_short(h, h->short_ref[0]->frame_num, 0);
             }
             for(j = 0; j < 16; j++) {
-                pic= remove_long(h, j);
-                if(pic) unreference_pic(h, pic, 0);
-            }
+                remove_long(h, j, 0);
+            }
+            s->current_picture_ptr->poc=
+            s->current_picture_ptr->field_poc[0]=
+            s->current_picture_ptr->field_poc[1]=
+            h->poc_lsb=
+            h->poc_msb=
+            h->frame_num=
+            s->current_picture_ptr->frame_num= 0;
             break;
         default: assert(0);
         }
     }
 
-    if (!current_ref_assigned && FIELD_PICTURE &&
-            !s->first_field && s->current_picture_ptr->reference) {
-
+    if (!current_ref_assigned) {
         /* Second field of complementary field pair; the first field of
          * which is already referenced. If short referenced, it
          * should be first entry in short_ref. If not, it must exist
          * in long_ref; trying to put it on the short list here is an
-         * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
+         * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
          */
         if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
             /* Just mark the second field valid */
@@ -3556,30 +3448,18 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
                                              "in complementary field pair "
                                              "(first field is long term)\n");
         } else {
-            /*
-             * First field in reference, but not in any sensible place on our
-             * reference lists. This shouldn't happen unless reference
-             * handling somewhere else is wrong.
-             */
-            assert(0);
-        }
-        current_ref_assigned = 1;
-    }
-
-    if(!current_ref_assigned){
-        pic= remove_short(h, s->current_picture_ptr->frame_num);
-        if(pic){
-            unreference_pic(h, pic, 0);
-            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
-        }
+            pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
+            if(pic){
+                av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
+            }
 
-        if(h->short_ref_count)
-            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
+            if(h->short_ref_count)
+                memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
 
-        h->short_ref[0]= s->current_picture_ptr;
-        h->short_ref[0]->long_ref=0;
-        h->short_ref_count++;
-        s->current_picture_ptr->reference |= s->picture_structure;
+            h->short_ref[0]= s->current_picture_ptr;
+            h->short_ref_count++;
+            s->current_picture_ptr->reference |= s->picture_structure;
+        }
     }
 
     if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
@@ -3592,19 +3472,17 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
                "number of reference frames exceeds max (probably "
                "corrupt input), discarding one\n");
 
-        if (h->long_ref_count) {
+        if (h->long_ref_count && !h->short_ref_count) {
             for (i = 0; i < 16; ++i)
                 if (h->long_ref[i])
                     break;
 
             assert(i < 16);
-            pic = h->long_ref[i];
-            remove_long_at_index(h, i);
+            remove_long(h, i, 0);
         } else {
             pic = h->short_ref[h->short_ref_count - 1];
-            remove_short_at_index(h, h->short_ref_count - 1);
+            remove_short(h, pic->frame_num, 0);
         }
-        unreference_pic(h, pic, 0);
     }
 
     print_short_term(h);
@@ -3616,13 +3494,12 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
     MpegEncContext * const s = &h->s;
     int i;
 
+    h->mmco_index= 0;
     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
         s->broken_link= get_bits1(gb) -1;
-        h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
-        if(h->mmco[0].long_arg == -1)
-            h->mmco_index= 0;
-        else{
+        if(get_bits1(gb)){
             h->mmco[0].opcode= MMCO_LONG;
+            h->mmco[0].long_arg= 0;
             h->mmco_index= 1;
         }
     }else{
@@ -3669,8 +3546,7 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
                     h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
                     h->mmco_index= 2;
                 }
-            }else
-                h->mmco_index= 0;
+            }
         }
     }
 
@@ -3681,24 +3557,15 @@ static int init_poc(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
     int field_poc[2];
+    Picture *cur = s->current_picture_ptr;
 
-    if(h->nal_unit_type == NAL_IDR_SLICE){
-        h->frame_num_offset= 0;
-    }else{
-        if(h->frame_num < h->prev_frame_num)
-            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
-        else
-            h->frame_num_offset= h->prev_frame_num_offset;
-    }
+    h->frame_num_offset= h->prev_frame_num_offset;
+    if(h->frame_num < h->prev_frame_num)
+        h->frame_num_offset += max_frame_num;
 
     if(h->sps.poc_type==0){
         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
 
-        if(h->nal_unit_type == NAL_IDR_SLICE){
-             h->prev_poc_msb=
-             h->prev_poc_lsb= 0;
-        }
-
         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
@@ -3745,29 +3612,20 @@ static int init_poc(H264Context *h){
         if(s->picture_structure == PICT_FRAME)
             field_poc[1] += h->delta_poc[1];
     }else{
-        int poc;
-        if(h->nal_unit_type == NAL_IDR_SLICE){
-            poc= 0;
-        }else{
-            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
-            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
-        }
+        int poc= 2*(h->frame_num_offset + h->frame_num);
+
+        if(!h->nal_ref_idc)
+            poc--;
+
         field_poc[0]= poc;
         field_poc[1]= poc;
     }
 
-    if(s->picture_structure != PICT_BOTTOM_FIELD) {
+    if(s->picture_structure != PICT_BOTTOM_FIELD)
         s->current_picture_ptr->field_poc[0]= field_poc[0];
-        s->current_picture_ptr->poc = field_poc[0];
-    }
-    if(s->picture_structure != PICT_TOP_FIELD) {
+    if(s->picture_structure != PICT_TOP_FIELD)
         s->current_picture_ptr->field_poc[1]= field_poc[1];
-        s->current_picture_ptr->poc = field_poc[1];
-    }
-    if(!FIELD_PICTURE || !s->first_field) {
-        Picture *cur = s->current_picture_ptr;
-        cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
-    }
+    cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
 
     return 0;
 }
@@ -3865,7 +3723,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     unsigned int pps_id;
     int num_ref_idx_active_override_flag;
     static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
-    unsigned int slice_type, tmp, i;
+    unsigned int slice_type, tmp, i, j;
     int default_ref_list_done = 0;
     int last_pic_structure;
 
@@ -3904,8 +3762,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         default_ref_list_done = 1;
     }
     h->slice_type= slice_type;
+    h->slice_type_nos= slice_type & 3;
 
-    s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+    s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
     if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
         av_log(h->s.avctx, AV_LOG_ERROR,
                "B picture before any references, skipping\n");
@@ -3918,13 +3777,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         return -1;
     }
     if(!h0->pps_buffers[pps_id]) {
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
         return -1;
     }
     h->pps= *h0->pps_buffers[pps_id];
 
     if(!h0->sps_buffers[h->pps.sps_id]) {
-        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
+        av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
         return -1;
     }
     h->sps = *h0->sps_buffers[h->pps.sps_id];
@@ -4008,8 +3867,19 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
             h->mb_aff_frame = h->sps.mb_aff;
         }
     }
+    h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
 
     if(h0->current_slice == 0){
+        while(h->frame_num !=  h->prev_frame_num &&
+              h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
+            av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
+            frame_start(h);
+            h->prev_frame_num++;
+            h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
+            s->current_picture_ptr->frame_num= h->prev_frame_num;
+            execute_ref_pic_marking(h, NULL, 0);
+        }
+
         /* See if we have a decoded first field looking for a pair... */
         if (s0->first_field) {
             assert(s0->current_picture_ptr);
@@ -4105,21 +3975,19 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         h->redundant_pic_count= get_ue_golomb(&s->gb);
     }
 
-    //set defaults, might be overriden a few line later
+    //set defaults, might be overridden a few lines later
     h->ref_count[0]= h->pps.ref_count[0];
     h->ref_count[1]= h->pps.ref_count[1];
 
-    if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
-        if(h->slice_type == FF_B_TYPE){
+    if(h->slice_type_nos != FF_I_TYPE){
+        if(h->slice_type_nos == FF_B_TYPE){
             h->direct_spatial_mv_pred= get_bits1(&s->gb);
-            if(FIELD_PICTURE && h->direct_spatial_mv_pred)
-                av_log(h->s.avctx, AV_LOG_ERROR, "PAFF + spatial direct mode is not implemented\n");
         }
         num_ref_idx_active_override_flag= get_bits1(&s->gb);
 
         if(num_ref_idx_active_override_flag){
             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
-            if(h->slice_type==FF_B_TYPE)
+            if(h->slice_type_nos==FF_B_TYPE)
                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
 
             if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
@@ -4128,7 +3996,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
                 return -1;
             }
         }
-        if(h->slice_type == FF_B_TYPE)
+        if(h->slice_type_nos == FF_B_TYPE)
             h->list_count= 2;
         else
             h->list_count= 1;
@@ -4139,13 +4007,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
         fill_default_ref_list(h);
     }
 
-    if(decode_ref_pic_list_reordering(h) < 0)
+    if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
         return -1;
 
-    if(   (h->pps.weighted_pred          && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
-       || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
+    if(   (h->pps.weighted_pred          && h->slice_type_nos == FF_P_TYPE )
+       ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
         pred_weight_table(h);
-    else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
+    else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
         implicit_weight_table(h);
     else
         h->use_weight = 0;
@@ -4156,7 +4024,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     if(FRAME_MBAFF)
         fill_mbaff_ref_list(h);
 
-    if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
+    if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
         tmp = get_ue_golomb(&s->gb);
         if(tmp > 2){
             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
@@ -4202,8 +4070,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     }
 
     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
-       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
-       ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type == FF_B_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
+       ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == FF_B_TYPE)
        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
         h->deblocking_filter= 0;
 
@@ -4231,6 +4099,15 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
     h0->last_slice_type = slice_type;
     h->slice_num = ++h0->current_slice;
 
+    for(j=0; j<2; j++){
+        int *ref2frm= h->ref2frm[h->slice_num&15][j];
+        ref2frm[0]=
+        ref2frm[1]= -1;
+        for(i=0; i<48; i++)
+            ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
+                          +(h->ref_list[j][i].reference&3);
+    }
+
     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
 
@@ -4351,12 +4228,11 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
                 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
             else
                 level_code= prefix + get_bits(gb, 4); //part
-        }else if(prefix==15){
-            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
-            if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
         }else{
-            av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
+            level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
+            if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
+            if(prefix>=16)
+                level_code += (1<<(prefix-3))-4096;
         }
 
         if(trailing_ones < 3) level_code += 2;
@@ -4374,11 +4250,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             prefix = get_level_prefix(gb);
             if(prefix<15){
                 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
-            }else if(prefix==15){
-                level_code =  (prefix<<suffix_length) + get_bits(gb, 12);
             }else{
-                av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
-                return -1;
+                level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
+                if(prefix>=16)
+                    level_code += (1<<(prefix-3))-4096;
             }
             mask= -(level_code&1);
             level[i]= (((2+level_code)>>1) ^ mask) - mask;
@@ -4465,7 +4340,7 @@ static void decode_mb_skip(H264Context *h){
     if(MB_FIELD)
         mb_type|= MB_TYPE_INTERLACED;
 
-    if( h->slice_type == FF_B_TYPE )
+    if( h->slice_type_nos == FF_B_TYPE )
     {
         // just for fill_caches. pred_direct_motion will set the real mb_type
         mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
@@ -4494,7 +4369,7 @@ static void decode_mb_skip(H264Context *h){
 
 /**
  * decodes a macroblock
- * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 static int decode_mb_cavlc(H264Context *h){
     MpegEncContext * const s = &h->s;
@@ -4510,7 +4385,7 @@ static int decode_mb_cavlc(H264Context *h){
     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
                 down the code */
-    if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
+    if(h->slice_type_nos != FF_I_TYPE){
         if(s->mb_skip_run==-1)
             s->mb_skip_run= get_ue_golomb(&s->gb);
 
@@ -4528,13 +4403,12 @@ static int decode_mb_cavlc(H264Context *h){
     if(FRAME_MBAFF){
         if( (s->mb_y&1) == 0 )
             h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
-    }else
-        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+    }
 
     h->prev_mb_skipped= 0;
 
     mb_type= get_ue_golomb(&s->gb);
-    if(h->slice_type == FF_B_TYPE){
+    if(h->slice_type_nos == FF_B_TYPE){
         if(mb_type < 23){
             partition_count= b_mb_type_info[mb_type].partition_count;
             mb_type=         b_mb_type_info[mb_type].type;
@@ -4542,7 +4416,7 @@ static int decode_mb_cavlc(H264Context *h){
             mb_type -= 23;
             goto decode_intra_mb;
         }
-    }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
+    }else if(h->slice_type_nos == FF_P_TYPE){
         if(mb_type < 5){
             partition_count= p_mb_type_info[mb_type].partition_count;
             mb_type=         p_mb_type_info[mb_type].type;
@@ -4551,7 +4425,9 @@ static int decode_mb_cavlc(H264Context *h){
             goto decode_intra_mb;
         }
     }else{
-       assert(h->slice_type == FF_I_TYPE);
+       assert(h->slice_type_nos == FF_I_TYPE);
+        if(h->slice_type == FF_SI_TYPE && mb_type)
+            mb_type--;
 decode_intra_mb:
         if(mb_type > 25){
             av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
@@ -4569,38 +4445,18 @@ decode_intra_mb:
     h->slice_table[ mb_xy ]= h->slice_num;
 
     if(IS_INTRA_PCM(mb_type)){
-        unsigned int x, y;
+        unsigned int x;
 
         // We assume these blocks are very rare so we do not optimize it.
         align_get_bits(&s->gb);
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
-            for(x=0; x<16; x++){
-                tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
-                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
-            }
+        for(x=0; x < (CHROMA ? 384 : 256); x++){
+            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
         }
 
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
-        h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
         // All coeffs are present
         memset(h->non_zero_count[mb_xy], 16, 16);
 
@@ -4617,47 +4473,48 @@ decode_intra_mb:
 
     //mb_pred
     if(IS_INTRA(mb_type)){
-            int pred_mode;
+        int pred_mode;
 //            init_top_left_availability(h);
-            if(IS_INTRA4x4(mb_type)){
-                int i;
-                int di = 1;
-                if(dct8x8_allowed && get_bits1(&s->gb)){
-                    mb_type |= MB_TYPE_8x8DCT;
-                    di = 4;
-                }
+        if(IS_INTRA4x4(mb_type)){
+            int i;
+            int di = 1;
+            if(dct8x8_allowed && get_bits1(&s->gb)){
+                mb_type |= MB_TYPE_8x8DCT;
+                di = 4;
+            }
 
 //                fill_intra4x4_pred_table(h);
-                for(i=0; i<16; i+=di){
-                    int mode= pred_intra_mode(h, i);
+            for(i=0; i<16; i+=di){
+                int mode= pred_intra_mode(h, i);
 
-                    if(!get_bits1(&s->gb)){
-                        const int rem_mode= get_bits(&s->gb, 3);
-                        mode = rem_mode + (rem_mode >= mode);
-                    }
-
-                    if(di==4)
-                        fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
-                    else
-                        h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+                if(!get_bits1(&s->gb)){
+                    const int rem_mode= get_bits(&s->gb, 3);
+                    mode = rem_mode + (rem_mode >= mode);
                 }
-                write_back_intra_pred_mode(h);
-                if( check_intra4x4_pred_mode(h) < 0)
-                    return -1;
-            }else{
-                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
-                if(h->intra16x16_pred_mode < 0)
-                    return -1;
-            }
 
+                if(di==4)
+                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
+                else
+                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
+            }
+            write_back_intra_pred_mode(h);
+            if( check_intra4x4_pred_mode(h) < 0)
+                return -1;
+        }else{
+            h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
+            if(h->intra16x16_pred_mode < 0)
+                return -1;
+        }
+        if(CHROMA){
             pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
             if(pred_mode < 0)
                 return -1;
             h->chroma_pred_mode= pred_mode;
+        }
     }else if(partition_count==4){
         int i, j, sub_partition_count[4], list, ref[2][4];
 
-        if(h->slice_type == FF_B_TYPE){
+        if(h->slice_type_nos == FF_B_TYPE){
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                 if(h->sub_mb_type[i] >=13){
@@ -4676,7 +4533,7 @@ decode_intra_mb:
                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
             }
         }else{
-            assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
+            assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
                 if(h->sub_mb_type[i] >=4){
@@ -4859,16 +4716,21 @@ decode_intra_mb:
             return -1;
         }
 
-        if(IS_INTRA4x4(mb_type))
-            cbp= golomb_to_intra4x4_cbp[cbp];
-        else
-            cbp= golomb_to_inter_cbp[cbp];
+        if(CHROMA){
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
+            else                     cbp= golomb_to_inter_cbp   [cbp];
+        }else{
+            if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
+            else                     cbp= golomb_to_inter_cbp_gray[cbp];
+        }
     }
     h->cbp = cbp;
 
     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
-        if(get_bits1(&s->gb))
+        if(get_bits1(&s->gb)){
             mb_type |= MB_TYPE_8x8DCT;
+            h->cbp_table[mb_xy]= cbp;
+        }
     }
     s->current_picture.mb_type[mb_xy]= mb_type;
 
@@ -5047,9 +4909,9 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
 static int decode_cabac_mb_type( H264Context *h ) {
     MpegEncContext * const s = &h->s;
 
-    if( h->slice_type == FF_I_TYPE ) {
+    if( h->slice_type_nos == FF_I_TYPE ) {
         return decode_cabac_intra_mb_type(h, 3, 1);
-    } else if( h->slice_type == FF_P_TYPE ) {
+    } else if( h->slice_type_nos == FF_P_TYPE ) {
         if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
             /* P-type */
             if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
@@ -5062,7 +4924,7 @@ static int decode_cabac_mb_type( H264Context *h ) {
         } else {
             return decode_cabac_intra_mb_type(h, 17, 0) + 5;
         }
-    } else if( h->slice_type == FF_B_TYPE ) {
+    } else if( h->slice_type_nos == FF_B_TYPE ) {
         const int mba_xy = h->left_mb_xy[0];
         const int mbb_xy = h->top_mb_xy;
         int ctx = 0;
@@ -5132,7 +4994,7 @@ static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
     if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
         ctx++;
 
-    if( h->slice_type == FF_B_TYPE )
+    if( h->slice_type_nos == FF_B_TYPE )
         ctx += 13;
     return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
 }
@@ -5269,7 +5131,7 @@ static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
     int ref  = 0;
     int ctx  = 0;
 
-    if( h->slice_type == FF_B_TYPE) {
+    if( h->slice_type_nos == FF_B_TYPE) {
         if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
             ctx++;
         if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
@@ -5342,22 +5204,22 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
     int ctx = 0;
 
     if( is_dc ) {
-    if( cat == 0 ) {
-        nza = h->left_cbp&0x100;
-        nzb = h-> top_cbp&0x100;
-    } else {
-        nza = (h->left_cbp>>(6+idx))&0x01;
-        nzb = (h-> top_cbp>>(6+idx))&0x01;
-    }
-    } else {
-    if( cat == 1 || cat == 2 ) {
-        nza = h->non_zero_count_cache[scan8[idx] - 1];
-        nzb = h->non_zero_count_cache[scan8[idx] - 8];
+        if( cat == 0 ) {
+            nza = h->left_cbp&0x100;
+            nzb = h-> top_cbp&0x100;
+        } else {
+            nza = (h->left_cbp>>(6+idx))&0x01;
+            nzb = (h-> top_cbp>>(6+idx))&0x01;
+        }
     } else {
-        assert(cat == 4);
-        nza = h->non_zero_count_cache[scan8[16+idx] - 1];
-        nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
-    }
+        if( cat == 4 ) {
+            nza = h->non_zero_count_cache[scan8[16+idx] - 1];
+            nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
+        } else {
+            assert(cat == 1 || cat == 2);
+            nza = h->non_zero_count_cache[scan8[idx] - 1];
+            nzb = h->non_zero_count_cache[scan8[idx] - 8];
+        }
     }
 
     if( nza > 0 )
@@ -5447,10 +5309,10 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     if( is_dc || cat != 5 ) {
         if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
             if( !is_dc ) {
-            if( cat == 1 || cat == 2 )
-                h->non_zero_count_cache[scan8[n]] = 0;
-            else
-                h->non_zero_count_cache[scan8[16+n]] = 0;
+                if( cat == 4 )
+                    h->non_zero_count_cache[scan8[16+n]] = 0;
+                else
+                    h->non_zero_count_cache[scan8[n]] = 0;
             }
 
 #ifdef CABAC_ON_STACK
@@ -5499,25 +5361,25 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
     assert(coeff_count > 0);
 
     if( is_dc ) {
-    if( cat == 0 )
-        h->cbp_table[h->mb_xy] |= 0x100;
-    else
-        h->cbp_table[h->mb_xy] |= 0x40 << n;
+        if( cat == 0 )
+            h->cbp_table[h->mb_xy] |= 0x100;
+        else
+            h->cbp_table[h->mb_xy] |= 0x40 << n;
     } else {
-    if( cat == 1 || cat == 2 )
-        h->non_zero_count_cache[scan8[n]] = coeff_count;
-    else if( cat == 4 )
-        h->non_zero_count_cache[scan8[16+n]] = coeff_count;
-    else {
-        assert( cat == 5 );
-        fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
-    }
+        if( cat == 5 )
+            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
+        else if( cat == 4 )
+            h->non_zero_count_cache[scan8[16+n]] = coeff_count;
+        else {
+            assert( cat == 1 || cat == 2 );
+            h->non_zero_count_cache[scan8[n]] = coeff_count;
+        }
     }
 
-    for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
+    do {
         uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
 
-        int j= scantable[index[coeff_count]];
+        int j= scantable[index[--coeff_count]];
 
         if( get_cabac( CC, ctx ) == 0 ) {
             node_ctx = coeff_abs_level_transition[0][node_ctx];
@@ -5549,14 +5411,12 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
             }
 
             if( is_dc ) {
-                if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
-                else                                block[j] =  coeff_abs;
+                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
             }else{
-                if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
-                else                                block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
+                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
             }
         }
-    }
+    } while( coeff_count );
 #ifdef CABAC_ON_STACK
             h->cabac.range     = cc.range     ;
             h->cabac.low       = cc.low       ;
@@ -5614,7 +5474,7 @@ static inline void compute_mb_neighbors(H264Context *h)
 
 /**
  * decodes a macroblock
- * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
+ * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
  */
 static int decode_mb_cabac(H264Context *h) {
     MpegEncContext * const s = &h->s;
@@ -5627,7 +5487,7 @@ static int decode_mb_cabac(H264Context *h) {
     s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
 
     tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
-    if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
+    if( h->slice_type_nos != FF_I_TYPE ) {
         int skip;
         /* a skipped mb needs the aff flag from the following mb */
         if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
@@ -5661,8 +5521,7 @@ static int decode_mb_cabac(H264Context *h) {
         if( (s->mb_y&1) == 0 )
             h->mb_mbaff =
             h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
-    }else
-        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
+    }
 
     h->prev_mb_skipped = 0;
 
@@ -5672,7 +5531,7 @@ static int decode_mb_cabac(H264Context *h) {
         return -1;
     }
 
-    if( h->slice_type == FF_B_TYPE ) {
+    if( h->slice_type_nos == FF_B_TYPE ) {
         if( mb_type < 23 ){
             partition_count= b_mb_type_info[mb_type].partition_count;
             mb_type=         b_mb_type_info[mb_type].type;
@@ -5680,7 +5539,7 @@ static int decode_mb_cabac(H264Context *h) {
             mb_type -= 23;
             goto decode_intra_mb;
         }
-    } else if( h->slice_type == FF_P_TYPE ) {
+    } else if( h->slice_type_nos == FF_P_TYPE ) {
         if( mb_type < 5) {
             partition_count= p_mb_type_info[mb_type].partition_count;
             mb_type=         p_mb_type_info[mb_type].type;
@@ -5689,7 +5548,9 @@ static int decode_mb_cabac(H264Context *h) {
             goto decode_intra_mb;
         }
     } else {
-       assert(h->slice_type == FF_I_TYPE);
+        if(h->slice_type == FF_SI_TYPE && mb_type)
+            mb_type--;
+        assert(h->slice_type_nos == FF_I_TYPE);
 decode_intra_mb:
         partition_count = 0;
         cbp= i_mb_type_info[mb_type].cbp;
@@ -5703,7 +5564,6 @@ decode_intra_mb:
 
     if(IS_INTRA_PCM(mb_type)) {
         const uint8_t *ptr;
-        unsigned int x, y;
 
         // We assume these blocks are very rare so we do not optimize it.
         // FIXME The two following lines get the bitstream position in the cabac
@@ -5715,26 +5575,9 @@ decode_intra_mb:
         }
 
         // The pixels are stored in the same order as levels in h->mb array.
-        for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
-            for(x=0; x<16; x++){
-                tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
-            }
-        }
-        for(y=0; y<8; y++){
-            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
-            for(x=0; x<8; x++){
-                tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
-                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
-            }
+        memcpy(h->mb, ptr, 256); ptr+=256;
+        if(CHROMA){
+            memcpy(h->mb+128, ptr, 128); ptr+=128;
         }
 
         ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
@@ -5744,11 +5587,10 @@ decode_intra_mb:
         h->chroma_pred_mode_table[mb_xy] = 0;
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
-        h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
-        h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
         // All coeffs are present
         memset(h->non_zero_count[mb_xy], 16, 16);
         s->current_picture.mb_type[mb_xy]= mb_type;
+        h->last_qscale_diff = 0;
         return 0;
     }
 
@@ -5783,16 +5625,18 @@ decode_intra_mb:
             h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
             if( h->intra16x16_pred_mode < 0 ) return -1;
         }
-        h->chroma_pred_mode_table[mb_xy] =
-        pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
+        if(CHROMA){
+            h->chroma_pred_mode_table[mb_xy] =
+            pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );
 
-        pred_mode= check_intra_pred_mode( h, pred_mode );
-        if( pred_mode < 0 ) return -1;
-        h->chroma_pred_mode= pred_mode;
+            pred_mode= check_intra_pred_mode( h, pred_mode );
+            if( pred_mode < 0 ) return -1;
+            h->chroma_pred_mode= pred_mode;
+        }
     } else if( partition_count == 4 ) {
         int i, j, sub_partition_count[4], list, ref[2][4];
 
-        if( h->slice_type == FF_B_TYPE ) {
+        if( h->slice_type_nos == FF_B_TYPE ) {
             for( i = 0; i < 4; i++ ) {
                 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
                 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
@@ -5990,7 +5834,8 @@ decode_intra_mb:
 
     if( !IS_INTRA16x16( mb_type ) ) {
         cbp  = decode_cabac_mb_cbp_luma( h );
-        cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
+        if(CHROMA)
+            cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
     }
 
     h->cbp_table[mb_xy] = h->cbp = cbp;
@@ -6423,6 +6268,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
     mb_xy = h->mb_xy;
 
     if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
+1 ||
        (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
                                       h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
         filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
@@ -6490,7 +6336,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
             int step = IS_8x8DCT(mb_type) ? 2 : 1;
             edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
             s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
-                                              (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
+                                              (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
         }
         if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
             bSv[0][0] = 0x0004000400040004ULL;
@@ -6534,11 +6380,6 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     int dir;
-    /* FIXME: A given frame may occupy more than one position in
-     * the reference list. So ref2frm should be populated with
-     * frame numbers, not indexes. */
-    static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-                                    16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
 
     //for sufficiently low qp, filtering wouldn't do anything
     //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
@@ -6552,6 +6393,43 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
         }
     }
 
+    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
+    if(!h->pps.cabac && h->pps.transform_8x8_mode){
+        int top_type, left_type[2];
+        top_type     = s->current_picture.mb_type[h->top_mb_xy]    ;
+        left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
+        left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
+
+        if(IS_8x8DCT(top_type)){
+            h->non_zero_count_cache[4+8*0]=
+            h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
+            h->non_zero_count_cache[6+8*0]=
+            h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
+        }
+        if(IS_8x8DCT(left_type[0])){
+            h->non_zero_count_cache[3+8*1]=
+            h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
+        }
+        if(IS_8x8DCT(left_type[1])){
+            h->non_zero_count_cache[3+8*3]=
+            h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
+        }
+
+        if(IS_8x8DCT(mb_type)){
+            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
+            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp_table[mb_xy] & 1;
+
+            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
+            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
+
+            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
+            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
+
+            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
+            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
+        }
+    }
+
     if (FRAME_MBAFF
             // left mb is in picture
             && h->slice_table[mb_xy-1] != 255
@@ -6616,6 +6494,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
         int edge;
         const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
         const int mbm_type = s->current_picture.mb_type[mbm_xy];
+        int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num          &15 ];
+        int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
         int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
 
         const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
@@ -6684,6 +6564,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
             /* mbn_xy: neighbor macroblock */
             const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
             const int mbn_type = s->current_picture.mb_type[mbn_xy];
+            int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
             int16_t bS[4];
             int qp;
 
@@ -6721,11 +6602,23 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                     int b_idx= 8 + 4 + edge * (dir ? 8:1);
                     int bn_idx= b_idx - (dir ? 8:1);
                     int v = 0;
-                    for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
-                        v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
+
+                    for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+                        v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
                              FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
                              FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
                     }
+
+                    if(h->slice_type_nos == FF_B_TYPE && v){
+                        v=0;
+                        for( l = 0; !v && l < 2; l++ ) {
+                            int ln= 1-l;
+                            v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
+                                FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
+                        }
+                    }
+
                     bS[0] = bS[1] = bS[2] = bS[3] = v;
                     mv_done = 1;
                 }
@@ -6745,14 +6638,27 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                     else if(!mv_done)
                     {
                         bS[i] = 0;
-                        for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
-                            if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
+                        for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+                            if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
                                 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
                                 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
                                 bS[i] = 1;
                                 break;
                             }
                         }
+
+                        if(h->slice_type_nos == FF_B_TYPE && bS[i]){
+                            bS[i] = 0;
+                            for( l = 0; l < 2; l++ ) {
+                                int ln= 1-l;
+                                if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
+                                    FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+                                    FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
+                                    bS[i] = 1;
+                                    break;
+                                }
+                            }
+                        }
                     }
                 }
 
@@ -6808,7 +6714,7 @@ static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
         /* calculate pre-state */
         for( i= 0; i < 460; i++ ) {
             int pre;
-            if( h->slice_type == FF_I_TYPE )
+            if( h->slice_type_nos == FF_I_TYPE )
                 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
             else
                 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
@@ -7214,14 +7120,17 @@ static inline int decode_seq_parameter_set(H264Context *h){
     sps->level_idc= level_idc;
 
     if(sps->profile_idc >= 100){ //high profile
-        if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
+        sps->chroma_format_idc= get_ue_golomb(&s->gb);
+        if(sps->chroma_format_idc == 3)
             get_bits1(&s->gb);  //residual_color_transform_flag
         get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
         get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
         sps->transform_bypass = get_bits1(&s->gb);
         decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
-    }else
+    }else{
         sps->scaling_matrix_present = 0;
+        sps->chroma_format_idc= 1;
+    }
 
     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
     sps->poc_type= get_ue_golomb(&s->gb);
@@ -7303,7 +7212,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
         decode_vui_parameters(h, sps);
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
                sps_id, sps->profile_idc, sps->level_idc,
                sps->poc_type,
                sps->ref_frame_count,
@@ -7312,7 +7221,8 @@ static inline int decode_seq_parameter_set(H264Context *h){
                sps->direct_8x8_inference_flag ? "8B8" : "",
                sps->crop_left, sps->crop_right,
                sps->crop_top, sps->crop_bottom,
-               sps->vui_parameters_present_flag ? "VUI" : ""
+               sps->vui_parameters_present_flag ? "VUI" : "",
+               ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
                );
     }
     return 0;
@@ -7322,8 +7232,8 @@ static void
 build_qp_table(PPS *pps, int t, int index)
 {
     int i;
-    for(i = 0; i < 255; i++)
-        pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
+    for(i = 0; i < 52; i++)
+        pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
 }
 
 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
@@ -7413,11 +7323,9 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
     }
 
     build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
-    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
-        build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+    if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
         h->pps.chroma_qp_diff= 1;
-    } else
-        memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
         av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
@@ -7574,8 +7482,8 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
             s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
             if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
-               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type!=FF_B_TYPE)
-               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
                && avctx->skip_frame < AVDISCARD_ALL)
                 context_count++;
             break;
@@ -7599,8 +7507,8 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
                && s->context_initialized
                && s->hurry_up < 5
                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
-               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type!=FF_B_TYPE)
-               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
+               && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=FF_B_TYPE)
+               && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
                && avctx->skip_frame < AVDISCARD_ALL)
                 context_count++;
             break;
@@ -7645,7 +7553,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
         else if(err == 1) {
             /* Slice could not be decoded in parallel mode, copy down
              * NAL unit stuff to context 0 and restart. Note that
-             * rbsp_buffer is not transfered, but since we no longer
+             * rbsp_buffer is not transferred, but since we no longer
              * run in parallel mode this should not be an issue. */
             h->nal_unit_type = hx->nal_unit_type;
             h->nal_ref_idc   = hx->nal_ref_idc;
@@ -7662,17 +7570,10 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
  * returns the number of bytes consumed for building the current frame
  */
 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        pos -= s->parse_context.last_index;
-        if(pos<0) pos=0; // FIXME remove (unneeded?)
-
-        return pos;
-    }else{
         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
         if(pos+10>buf_size) pos=buf_size; // oops ;)
 
         return pos;
-    }
 }
 
 static int decode_frame(AVCodecContext *avctx,
@@ -7687,16 +7588,7 @@ static int decode_frame(AVCodecContext *avctx,
     s->flags= avctx->flags;
     s->flags2= avctx->flags2;
 
-    if(s->flags&CODEC_FLAG_TRUNCATED){
-        const int next= ff_h264_find_frame_end(h, buf, buf_size);
-        assert((buf_size > 0) || (next == END_NOT_FOUND));
-
-        if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
-          return buf_size;
-//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
-    }
-
-   /* no supplementary picture */
+   /* end of stream, output what is still in the buffers */
     if (buf_size == 0) {
         Picture *out;
         int i, out_idx;
@@ -7704,7 +7596,7 @@ static int decode_frame(AVCodecContext *avctx,
 //FIXME factorize this with the output code below
         out = h->delayed_pic[0];
         out_idx = 0;
-        for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+        for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
             if(h->delayed_pic[i]->poc < out->poc){
                 out = h->delayed_pic[i];
                 out_idx = i;
@@ -7780,7 +7672,6 @@ static int decode_frame(AVCodecContext *avctx,
     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
         Picture *out = s->current_picture_ptr;
         Picture *cur = s->current_picture_ptr;
-        Picture *prev = h->delayed_output_pic;
         int i, pics, cross_idr, out_of_order, out_idx;
 
         s->mb_y= 0;
@@ -7788,13 +7679,13 @@ static int decode_frame(AVCodecContext *avctx,
         s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
         s->current_picture_ptr->pict_type= s->pict_type;
 
-        h->prev_frame_num_offset= h->frame_num_offset;
-        h->prev_frame_num= h->frame_num;
         if(!s->dropable) {
+            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
             h->prev_poc_msb= h->poc_msb;
             h->prev_poc_lsb= h->poc_lsb;
-            execute_ref_pic_marking(h, h->mmco, h->mmco_index);
         }
+        h->prev_frame_num_offset= h->frame_num_offset;
+        h->prev_frame_num= h->frame_num;
 
         /*
          * FIXME: Error handling code does not seem to support interlaced
@@ -7813,7 +7704,7 @@ static int decode_frame(AVCodecContext *avctx,
 
         MPV_frame_end(s);
 
-        if (s->first_field) {
+        if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
             /* Wait for second field. */
             *data_size = 0;
 
@@ -7824,9 +7715,6 @@ static int decode_frame(AVCodecContext *avctx,
 
         //FIXME do something with unavailable reference frames
 
-#if 0 //decode order
-            *data_size = sizeof(AVFrame);
-#else
             /* Sort B-frames into display order */
 
             if(h->sps.bitstream_restriction_flag
@@ -7835,63 +7723,56 @@ static int decode_frame(AVCodecContext *avctx,
                 s->low_delay = 0;
             }
 
+            if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
+               && !h->sps.bitstream_restriction_flag){
+                s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
+                s->low_delay= 0;
+            }
+
             pics = 0;
             while(h->delayed_pic[pics]) pics++;
 
-            assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
+            assert(pics <= MAX_DELAYED_PIC_COUNT);
 
             h->delayed_pic[pics++] = cur;
             if(cur->reference == 0)
                 cur->reference = DELAYED_PIC_REF;
 
-            cross_idr = 0;
-            for(i=0; h->delayed_pic[i]; i++)
-                if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
-                    cross_idr = 1;
-
             out = h->delayed_pic[0];
             out_idx = 0;
-            for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+            for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
                 if(h->delayed_pic[i]->poc < out->poc){
                     out = h->delayed_pic[i];
                     out_idx = i;
                 }
+            cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
+
+            out_of_order = !cross_idr && out->poc < h->outputed_poc;
 
-            out_of_order = !cross_idr && prev && out->poc < prev->poc;
             if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
                 { }
-            else if(prev && pics <= s->avctx->has_b_frames)
-                out = prev;
-            else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
+            else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
                || (s->low_delay &&
-                ((!cross_idr && prev && out->poc > prev->poc + 2)
+                ((!cross_idr && out->poc > h->outputed_poc + 2)
                  || cur->pict_type == FF_B_TYPE)))
             {
                 s->low_delay = 0;
                 s->avctx->has_b_frames++;
-                out = prev;
             }
-            else if(out_of_order)
-                out = prev;
 
             if(out_of_order || pics > s->avctx->has_b_frames){
+                out->reference &= ~DELAYED_PIC_REF;
                 for(i=out_idx; h->delayed_pic[i]; i++)
                     h->delayed_pic[i] = h->delayed_pic[i+1];
             }
-
-            if(prev == out)
-                *data_size = 0;
-            else
+            if(!out_of_order && pics > s->avctx->has_b_frames){
                 *data_size = sizeof(AVFrame);
-            if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
-                prev->reference = 0;
-            h->delayed_output_pic = out;
-#endif
 
-            if(out)
+                h->outputed_poc = out->poc;
                 *pict= *(AVFrame*)out;
-            else
+            }else{
                 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+            }
         }
     }
 
@@ -8127,7 +8008,7 @@ AVCodec h264_decoder = {
     NULL,
     decode_end,
     decode_frame,
-    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .flush= flush_dpb,
     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
 };