allow 16 refs with B-frames

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index 1739108d42570cb0de088d9141c823e448dba358..567f2832c566a16a8e70b676b2742dad5471847b 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -87,6 +87,8 @@ typedef struct SPS{
      uint32_t time_scale;
      int fixed_frame_rate_flag;
      short offset_for_ref_frame[256]; //FIXME dyn aloc?
+    int bitstream_restriction_flag;
+    int num_reorder_frames;
  }SPS;
  
  /**
@@ -165,6 +167,9 @@ typedef struct H264Context{
      //prediction stuff
      int chroma_pred_mode;
      int intra16x16_pred_mode;
+
+    int top_mb_xy;
+    int left_mb_xy[2];
      
      int8_t intra4x4_pred_mode_cache[5*8];
      int8_t (*intra4x4_pred_mode)[8];
@@ -175,8 +180,8 @@ typedef struct H264Context{
      unsigned int top_samples_available;
      unsigned int topright_samples_available;
      unsigned int left_samples_available;
-    uint8_t (*top_border)[16+2*8];
-    uint8_t left_border[17+2*9];
+    uint8_t (*top_borders[2])[16+2*8];
+    uint8_t left_border[2*(17+2*9)];
  
      /**
       * non zero coeff count cache.
@@ -198,8 +203,11 @@ typedef struct H264Context{
       */
      int mv_cache_clean[2];
  
-    int block_offset[16+8];
-    int chroma_subblock_offset[16]; //FIXME remove
+    /**
+     * block_offset[ 0..23] for frame macroblocks
+     * block_offset[24..47] for field macroblocks
+     */
+    int block_offset[2*(16+8)];
      
      uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
      uint16_t *mb2b8_xy;
@@ -228,6 +236,7 @@ typedef struct H264Context{
      int slice_type_fixed;
      
      //interlacing specific flags
+    int mb_aff_frame;
      int mb_field_decoding_flag;
      
      int sub_mb_type[4];
@@ -274,17 +283,19 @@ typedef struct H264Context{
      
      int direct_spatial_mv_pred;
      int dist_scale_factor[16];
+    int map_col_to_list0[2][16];
  
      /**
       * num_ref_idx_l0/1_active_minus1 + 1
       */
      int ref_count[2];// FIXME split for AFF
-    Picture *short_ref[16];
-    Picture *long_ref[16];
+    Picture *short_ref[32];
+    Picture *long_ref[32];
      Picture default_ref_list[2][32];
      Picture ref_list[2][32]; //FIXME size?
      Picture field_ref_list[2][32]; //FIXME size?
      Picture *delayed_pic[16]; //FIXME size?
+    Picture *delayed_output_pic;
      
      /**
       * memory management control operations buffer.
@@ -335,7 +346,7 @@ static VLC run7_vlc;
  
  static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
  static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
-static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr);
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  
  static inline uint32_t pack16to32(int a, int b){
  #ifdef WORDS_BIGENDIAN
@@ -414,28 +425,94 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
      const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
      int topleft_xy, top_xy, topright_xy, left_xy[2];
      int topleft_type, top_type, topright_type, left_type[2];
-    int left_block[4];
+    int left_block[8];
      int i;
  
      //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
      
-    if(h->sps.mb_aff){
-    //FIXME
-        topleft_xy = 0; /* avoid warning */
-        top_xy = 0; /* avoid warning */
-        topright_xy = 0; /* avoid warning */
-    }else{
-        topleft_xy = mb_xy-1 - s->mb_stride;
-        top_xy     = mb_xy   - s->mb_stride;
-        topright_xy= mb_xy+1 - s->mb_stride;
-        left_xy[0]   = mb_xy-1;
-        left_xy[1]   = mb_xy-1;
-        left_block[0]= 0;
-        left_block[1]= 1;
-        left_block[2]= 2;
-        left_block[3]= 3;
+    top_xy     = mb_xy  - s->mb_stride;
+    topleft_xy = top_xy - 1;
+    topright_xy= top_xy + 1;
+    left_xy[1] = left_xy[0] = mb_xy-1;
+    left_block[0]= 0;
+    left_block[1]= 1;
+    left_block[2]= 2;
+    left_block[3]= 3;
+    left_block[4]= 7;
+    left_block[5]= 10;
+    left_block[6]= 8;
+    left_block[7]= 11;
+    if(h->mb_aff_frame){
+        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
+        const int top_pair_xy      = pair_xy     - s->mb_stride;
+        const int topleft_pair_xy  = top_pair_xy - 1;
+        const int topright_pair_xy = top_pair_xy + 1;
+        const int topleft_mb_frame_flag  = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
+        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+        const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
+        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+        const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
+        const int bottom = (s->mb_y & 1);
+        tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
+                ) {
+            top_xy -= s->mb_stride;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
+                ) {
+            topleft_xy -= s->mb_stride;
+        }
+        if (bottom
+                ? !curr_mb_frame_flag // bottom macroblock
+                : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
+                ) {
+            topright_xy -= s->mb_stride;
+        }
+        if (left_mb_frame_flag != curr_mb_frame_flag) {
+            left_xy[1] = left_xy[0] = pair_xy - 1;
+            if (curr_mb_frame_flag) {
+                if (bottom) {
+                    left_block[0]= 2;
+                    left_block[1]= 2;
+                    left_block[2]= 3;
+                    left_block[3]= 3;
+                    left_block[4]= 8;
+                    left_block[5]= 11;
+                    left_block[6]= 8;
+                    left_block[7]= 11;
+                } else {
+                    left_block[0]= 0;
+                    left_block[1]= 0;
+                    left_block[2]= 1;
+                    left_block[3]= 1;
+                    left_block[4]= 7;
+                    left_block[5]= 10;
+                    left_block[6]= 7;
+                    left_block[7]= 10;
+                }
+            } else {
+                left_xy[1] += s->mb_stride;
+                //left_block[0]= 0;
+                left_block[1]= 2;
+                left_block[2]= 0;
+                left_block[3]= 2;
+                //left_block[4]= 7;
+                left_block[5]= 10;
+                left_block[6]= 7;
+                left_block[7]= 10;
+            }
+        }
      }
  
+    if(for_deblock){
+        h->top_mb_xy = top_xy;
+        h->left_mb_xy[0] = left_xy[0];
+        h->left_mb_xy[1] = left_xy[1];
+    }
      if(for_deblock){
          topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
          top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
@@ -482,10 +559,10 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                  h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
              }else{
                  int pred;
-                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
-                    pred= 2;
-                else{
+                if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
                      pred= -1;
+                else{
+                    pred= 2;
                  }
                  h->intra4x4_pred_mode_cache[4+8*0]=
                  h->intra4x4_pred_mode_cache[5+8*0]=
@@ -498,10 +575,10 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                      h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
                  }else{
                      int pred;
-                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
-                        pred= 2;
-                    else{
+                    if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
                          pred= -1;
+                    else{
+                        pred= 2;
                      }
                      h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                      h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
@@ -521,15 +598,15 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
  */
  //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
      if(top_type){
-        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
-        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
-        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
+        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
+        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
+        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
          h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
      
-        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
+        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
          h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
      
-        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
+        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
          h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
          
          h->top_cbp= h->cbp_table[top_xy];
@@ -549,40 +626,30 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
          else                  h->top_cbp= 0;
      }
      
-    if(left_type[0]){
-        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
-        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
-        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
-        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
-        h->left_cbp= h->cbp_table[left_xy[0]]; //FIXME interlacing
-    }else{
-        h->non_zero_count_cache[3+8*1]= 
-        h->non_zero_count_cache[3+8*2]= 
-        h->non_zero_count_cache[0+8*1]= 
-        h->non_zero_count_cache[0+8*4]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
-        
-        if(IS_INTRA(mb_type)) h->left_cbp= 0x1C0;//FIXME interlacing
-        else                  h->left_cbp= 0;
-    }
-    
-    if(left_type[1]){
-        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
-        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
-        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
-        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
-    }else{
-        h->non_zero_count_cache[3+8*3]= 
-        h->non_zero_count_cache[3+8*4]= 
-        h->non_zero_count_cache[0+8*2]= 
-        h->non_zero_count_cache[0+8*5]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+    for (i=0; i<2; i++) {
+        if(left_type[i]){
+            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
+            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
+            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
+            h->left_cbp= h->cbp_table[left_xy[i]]; //FIXME interlacing
+        }else{
+            h->non_zero_count_cache[3+8*1 + 2*8*i]= 
+            h->non_zero_count_cache[3+8*2 + 2*8*i]= 
+            h->non_zero_count_cache[0+8*1 +   8*i]= 
+            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
+            
+            if(IS_INTRA(mb_type)) h->left_cbp= 0x1C0;//FIXME interlacing
+            else                  h->left_cbp= 0;
+        }
      }
-    
+
  #if 1
      //FIXME direct mb can skip much of this
      if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
          int list;
          for(list=0; list<2; list++){
-            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
+            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !for_deblock){
                  /*if(!h->mv_cache_clean[list]){
                      memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                      memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
@@ -849,21 +916,21 @@ static inline void write_back_non_zero_count(H264Context *h){
      MpegEncContext * const s = &h->s;
      const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
  
-    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
-    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
-    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
+    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
+    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
+    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
      h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
-    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
-    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
-    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
+    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
+    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
+    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
      
-    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
+    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
      h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
-    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
+    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
  
-    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
+    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
      h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
-    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
+    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
  }
  
  /**
@@ -1060,6 +1127,34 @@ static inline void direct_dist_scale_factor(H264Context * const h){
          }
      }
  }
+static inline void direct_ref_list_init(H264Context * const h){
+    MpegEncContext * const s = &h->s;
+    Picture * const ref1 = &h->ref_list[1][0];
+    Picture * const cur = s->current_picture_ptr;
+    int list, i, j;
+    if(cur->pict_type == I_TYPE)
+        cur->ref_count[0] = 0;
+    if(cur->pict_type != B_TYPE)
+        cur->ref_count[1] = 0;
+    for(list=0; list<2; list++){
+        cur->ref_count[list] = h->ref_count[list];
+        for(j=0; j<h->ref_count[list]; j++)
+            cur->ref_poc[list][j] = h->ref_list[list][j].poc;
+    }
+    if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
+        return;
+    for(list=0; list<2; list++){
+        for(i=0; i<ref1->ref_count[list]; i++){
+            const int poc = ref1->ref_poc[list][i];
+            h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE;
+            for(j=0; j<h->ref_count[list]; j++)
+                if(h->ref_list[list][j].poc == poc){
+                    h->map_col_to_list0[list][i] = j;
+                    break;
+                }
+        }
+    }
+}
  
  static inline void pred_direct_motion(H264Context * const h, int *mb_type){
      MpegEncContext * const s = &h->s;
@@ -1069,6 +1164,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
      const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
      const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
      const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
+    const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
      const int is_b8x8 = IS_8X8(*mb_type);
      int sub_mb_type;
      int i8, i4;
@@ -1077,13 +1173,13 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
          /* FIXME save sub mb types from previous frames (or derive from MVs)
           * so we know exactly what block size to use */
          sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
-        *mb_type =    MB_TYPE_8x8;
+        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
      }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
          sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
          *mb_type =    MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
      }else{
          sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
-        *mb_type =    MB_TYPE_8x8;
+        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
      }
      if(!is_b8x8)
          *mb_type |= MB_TYPE_DIRECT2;
@@ -1178,7 +1274,6 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
              }
          }
      }else{ /* direct temporal mv pred */
-        /* FIXME assumes that L1ref0 used the same ref lists as current frame */
          if(IS_16X16(*mb_type)){
              fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
              if(IS_INTRA(mb_type_col)){
@@ -1186,7 +1281,8 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                  fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
                  fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
              }else{
-                const int ref0 = l1ref0[0];
+                const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
+                                                : h->map_col_to_list0[1][l1ref1[0]];
                  const int dist_scale_factor = h->dist_scale_factor[ref0];
                  const int16_t *mv_col = l1mv0[0];
                  int mv_l0[2];
@@ -1214,6 +1310,10 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                  }
      
                  ref0 = l1ref0[x8 + y8*h->b8_stride];
+                if(ref0 >= 0)
+                    ref0 = h->map_col_to_list0[0][ref0];
+                else
+                    ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
                  dist_scale_factor = h->dist_scale_factor[ref0];
      
                  fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
@@ -1239,7 +1339,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
  
      for(list=0; list<2; list++){
          int y;
-        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
+        if(!USES_LIST(mb_type, list)){
              if(1){ //FIXME skip or never read if mb_type doesnt use it
                  for(y=0; y<4; y++){
                      *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
@@ -1572,9 +1672,9 @@ static void chroma_dc_dct_c(DCTELEM *block){
  /**
   * gets the chroma qp.
   */
-static inline int get_chroma_qp(H264Context *h, int qscale){
+static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
      
-    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
+    return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
  }
  
  
@@ -2507,7 +2607,8 @@ static void free_tables(H264Context *h){
      av_freep(&h->direct_table);
      av_freep(&h->non_zero_count);
      av_freep(&h->slice_table_base);
-    av_freep(&h->top_border);
+    av_freep(&h->top_borders[1]);
+    av_freep(&h->top_borders[0]);
      h->slice_table= NULL;
  
      av_freep(&h->mb2b_xy);
@@ -2529,7 +2630,8 @@ static int alloc_tables(H264Context *h){
  
      CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
      CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
-    CHECKED_ALLOCZ(h->top_border       , s->mb_width * (16+8+8) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->top_borders[0]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->top_borders[1]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
      CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
  
      if( h->pps.cabac ) {
@@ -2595,15 +2697,12 @@ static int decode_init(AVCodecContext *avctx){
  
      decode_init_vlc(h);
      
-    if(avctx->codec_tag != 0x31637661 && avctx->codec_tag != 0x31435641) // avc1
-        h->is_avc = 0;
-    else {
-        if((avctx->extradata_size == 0) || (avctx->extradata == NULL)) {
-            av_log(avctx, AV_LOG_ERROR, "AVC codec requires avcC data\n");
-            return -1;
-        }
+    if(avctx->extradata_size > 0 && avctx->extradata &&
+       *(char *)avctx->extradata == 1){
          h->is_avc = 1;
          h->got_avcC = 0;
+    } else {
+        h->is_avc = 0;
      }
  
      return 0;
@@ -2620,11 +2719,13 @@ static void frame_start(H264Context *h){
  
      for(i=0; i<16; i++){
          h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
-        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
      }
      for(i=0; i<4; i++){
          h->block_offset[16+i]=
          h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
+        h->block_offset[24+16+i]=
+        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
      }
  
      /* can't be in alloc_tables because linesize isn't known there.
@@ -2643,23 +2744,25 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
      src_cb -= uvlinesize;
      src_cr -= uvlinesize;
  
-    h->left_border[0]= h->top_border[s->mb_x][15];
+    // There is two lines saved, the line above the the top macroblock of a pair,
+    // and the line above the bottom macroblock
+    h->left_border[0]= h->top_borders[0][s->mb_x][15];
      for(i=1; i<17; i++){
          h->left_border[i]= src_y[15+i*  linesize];
      }
      
-    *(uint64_t*)(h->top_border[s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
-    *(uint64_t*)(h->top_border[s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
  
      if(!(s->flags&CODEC_FLAG_GRAY)){
-        h->left_border[17  ]= h->top_border[s->mb_x][16+7];
-        h->left_border[17+9]= h->top_border[s->mb_x][24+7];
+        h->left_border[17  ]= h->top_borders[0][s->mb_x][16+7];
+        h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
          for(i=1; i<9; i++){
              h->left_border[i+17  ]= src_cb[7+i*uvlinesize];
              h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
          }
-        *(uint64_t*)(h->top_border[s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
-        *(uint64_t*)(h->top_border[s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
      }
  }
  
@@ -2687,8 +2790,8 @@ b= t;
      }
  
      if(deblock_top){
-        XCHG(*(uint64_t*)(h->top_border[s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
-        XCHG(*(uint64_t*)(h->top_border[s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
      }
  
      if(!(s->flags&CODEC_FLAG_GRAY)){
@@ -2699,8 +2802,93 @@ b= t;
              }
          }
          if(deblock_top){
-            XCHG(*(uint64_t*)(h->top_border[s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
-            XCHG(*(uint64_t*)(h->top_border[s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+        }
+    }
+}
+
+static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
+    MpegEncContext * const s = &h->s;
+    int i;
+    
+    src_y  -= 2 *   linesize;
+    src_cb -= 2 * uvlinesize;
+    src_cr -= 2 * uvlinesize;
+
+    // There is two lines saved, the line above the the top macroblock of a pair,
+    // and the line above the bottom macroblock
+    h->left_border[0]= h->top_borders[0][s->mb_x][15];
+    h->left_border[1]= h->top_borders[1][s->mb_x][15];
+    for(i=2; i<34; i++){
+        h->left_border[i]= src_y[15+i*  linesize];
+    }
+    
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  32*linesize);
+    *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
+    *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y +  33*linesize);
+    *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        h->left_border[34     ]= h->top_borders[0][s->mb_x][16+7];
+        h->left_border[34+   1]= h->top_borders[1][s->mb_x][16+7];
+        h->left_border[34+18  ]= h->top_borders[0][s->mb_x][24+7];
+        h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
+        for(i=2; i<18; i++){
+            h->left_border[i+34   ]= src_cb[7+i*uvlinesize];
+            h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
+        }
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
+        *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
+        *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
+        *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
+    }
+}
+
+static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
+    MpegEncContext * const s = &h->s;
+    int temp8, i;
+    uint64_t temp64;
+    int deblock_left = (s->mb_x > 0);
+    int deblock_top  = (s->mb_y > 0);
+
+    tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
+
+    src_y  -= 2 *   linesize + 1;
+    src_cb -= 2 * uvlinesize + 1;
+    src_cr -= 2 * uvlinesize + 1;
+
+#define XCHG(a,b,t,xchg)\
+t= a;\
+if(xchg)\
+    a= b;\
+b= t;
+
+    if(deblock_left){
+        for(i = (!deblock_top)<<1; i<34; i++){
+            XCHG(h->left_border[i     ], src_y [i*  linesize], temp8, xchg);
+        }
+    }
+
+    if(deblock_top){
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
+        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
+        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
+    }
+
+    if(!(s->flags&CODEC_FLAG_GRAY)){
+        if(deblock_left){
+            for(i = (!deblock_top) << 1; i<18; i++){
+                XCHG(h->left_border[i+34   ], src_cb[i*uvlinesize], temp8, xchg);
+                XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
+            }
+        }
+        if(deblock_top){
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
+            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
          }
      }
  }
@@ -2714,13 +2902,12 @@ static void hl_decode_mb(H264Context *h){
      uint8_t  *dest_y, *dest_cb, *dest_cr;
      int linesize, uvlinesize /*dct_offset*/;
      int i;
+    int *block_offset = &h->block_offset[0];
+    const unsigned int bottom = mb_y & 1;
  
      if(!s->decode)
          return;
  
-    if(s->mb_skiped){
-    }
-
      dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
      dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
      dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
@@ -2728,10 +2915,11 @@ static void hl_decode_mb(H264Context *h){
      if (h->mb_field_decoding_flag) {
          linesize = s->linesize * 2;
          uvlinesize = s->uvlinesize * 2;
+        block_offset = &h->block_offset[24];
          if(mb_y&1){ //FIXME move out of this func?
              dest_y -= s->linesize*15;
-            dest_cb-= s->linesize*7;
-            dest_cr-= s->linesize*7;
+            dest_cb-= s->uvlinesize*7;
+            dest_cr-= s->uvlinesize*7;
          }
      } else {
          linesize = s->linesize;
@@ -2739,116 +2927,195 @@ static void hl_decode_mb(H264Context *h){
  //        dct_offset = s->linesize * 16;
      }
  
-    if(IS_INTRA(mb_type)){
-        if(h->deblocking_filter)
-            xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
+    if (IS_INTRA_PCM(mb_type)) {
+        unsigned int x, y;
  
-        if(!(s->flags&CODEC_FLAG_GRAY)){
-            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
-            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+        // The pixels are stored in h->mb array in the same order as levels,
+        // copy them in output in the correct order.
+        for(i=0; i<16; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
+        }
+        for(i=16; i<16+4; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
          }
+        for(i=20; i<20+4; i++) {
+            for (y=0; y<4; y++) {
+                for (x=0; x<4; x++) {
+                    *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
+                }
+            }
+        }
+    } else {
+        if(IS_INTRA(mb_type)){
+            if(h->deblocking_filter) {
+                if (h->mb_aff_frame) {
+                    if (!bottom)
+                        xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
+                } else {
+                    xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
+                }
+            }
  
-        if(IS_INTRA4x4(mb_type)){
-            if(!s->encoding){
-                for(i=0; i<16; i++){
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    uint8_t *topright;
-                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
-                    int tr;
-
-                    if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
-                        const int topright_avail= (h->topright_samples_available<<i)&0x8000;
-                        assert(mb_y || linesize <= h->block_offset[i]);
-                        if(!topright_avail){
-                            tr= ptr[3 - linesize]*0x01010101;
-                            topright= (uint8_t*) &tr;
-                        }else if(i==5 && h->deblocking_filter){
-                            tr= *(uint32_t*)h->top_border[mb_x+1];
-                            topright= (uint8_t*) &tr;
-                        }else
-                            topright= ptr + 4 - linesize;
-                    }else
-                        topright= NULL;
+            if(!(s->flags&CODEC_FLAG_GRAY)){
+                h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
+                h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+            }
  
-                    h->pred4x4[ dir ](ptr, topright, linesize);
-                    if(h->non_zero_count_cache[ scan8[i] ]){
-                        if(s->codec_id == CODEC_ID_H264)
-                            s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
-                        else
-                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+            if(IS_INTRA4x4(mb_type)){
+                if(!s->encoding){
+                    for(i=0; i<16; i++){
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        uint8_t *topright;
+                        const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+                        int tr;
+
+                        if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+                            const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+                            assert(mb_y || linesize <= block_offset[i]);
+                            if(!topright_avail){
+                                tr= ptr[3 - linesize]*0x01010101;
+                                topright= (uint8_t*) &tr;
+                            }else if(i==5 && h->deblocking_filter){
+                                tr= *(uint32_t*)h->top_borders[h->mb_aff_frame ? IS_INTERLACED(mb_type) ? bottom : 1 : 0][mb_x+1];
+                                topright= (uint8_t*) &tr;
+                            }else
+                                topright= ptr + 4 - linesize;
+                        }else
+                            topright= NULL;
+
+                        h->pred4x4[ dir ](ptr, topright, linesize);
+                        if(h->non_zero_count_cache[ scan8[i] ]){
+                            if(s->codec_id == CODEC_ID_H264)
+                                s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
+                            else
+                                svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+                        }
                      }
                  }
+            }else{
+                h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+                if(s->codec_id == CODEC_ID_H264)
+                    h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
+                else
+                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
              }
-        }else{
-            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
-            if(s->codec_id == CODEC_ID_H264)
-                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
-            else
-                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
+            if(h->deblocking_filter) {
+                if (h->mb_aff_frame) {
+                    if (bottom) {
+                        uint8_t *pair_dest_y  = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize  ) + mb_x * 16;
+                        uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
+                        uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
+                        s->mb_y--;
+                        xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
+                        s->mb_y++;
+                    }
+                } else {
+                    xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
+                }
+            }
+        }else if(s->codec_id == CODEC_ID_H264){
+            hl_motion(h, dest_y, dest_cb, dest_cr,
+                      s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
+                      s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
+                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
          }
-        if(h->deblocking_filter)
-            xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
-    }else if(s->codec_id == CODEC_ID_H264){
-        hl_motion(h, dest_y, dest_cb, dest_cr,
-                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
-                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
-                  s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
-    }
  
  
-    if(!IS_INTRA4x4(mb_type)){
-        if(s->codec_id == CODEC_ID_H264){
-            for(i=0; i<16; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
+        if(!IS_INTRA4x4(mb_type)){
+            if(s->codec_id == CODEC_ID_H264){
+                for(i=0; i<16; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize);
+                    }
                  }
-            }
-        }else{
-            for(i=0; i<16; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                    uint8_t * const ptr= dest_y + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+            }else{
+                for(i=0; i<16; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                        uint8_t * const ptr= dest_y + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                    }
                  }
              }
          }
-    }
  
-    if(!(s->flags&CODEC_FLAG_GRAY)){
-        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
-        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
-        if(s->codec_id == CODEC_ID_H264){
-            for(i=16; i<16+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cb + h->block_offset[i];
-                    s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
+        if(!(s->flags&CODEC_FLAG_GRAY)){
+            chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
+            chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
+            if(s->codec_id == CODEC_ID_H264){
+                for(i=16; i<16+4; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest_cb + block_offset[i];
+                        s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
+                    }
                  }
-            }
-            for(i=20; i<20+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cr + h->block_offset[i];
-                    s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
+                for(i=20; i<20+4; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest_cr + block_offset[i];
+                        s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize);
+                    }
                  }
-            }
-        }else{
-            for(i=16; i<16+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cb + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+            }else{
+                for(i=16; i<16+4; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest_cb + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                    }
                  }
-            }
-            for(i=20; i<20+4; i++){
-                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                    uint8_t * const ptr= dest_cr + h->block_offset[i];
-                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                for(i=20; i<20+4; i++){
+                    if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                        uint8_t * const ptr= dest_cr + block_offset[i];
+                        svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                    }
                  }
              }
          }
      }
      if(h->deblocking_filter) {
-        backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
-        fill_caches(h, mb_type, 1); //FIXME dont fill stuff which isnt used by filter_mb
-        filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr);
+        if (h->mb_aff_frame) {
+            const int mb_y = s->mb_y - 1;
+            uint8_t  *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
+            const int mb_xy= mb_x + mb_y*s->mb_stride;
+            const int mb_type_top   = s->current_picture.mb_type[mb_xy];
+            const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
+            uint8_t tmp = s->current_picture.data[1][384];
+            if (!bottom) return;
+            pair_dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
+            pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+            pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+
+            backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
+            // TODO deblock a pair
+            // top 
+            s->mb_y--;
+            tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
+            fill_caches(h, mb_type_top, 1); //FIXME dont fill stuff which isnt used by filter_mb
+            filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
+            if (tmp != s->current_picture.data[1][384]) {
+                tprintf("modified pixel 8,1 (1)\n");
+            }
+            // bottom
+            s->mb_y++;
+            tprintf("call mbaff filter_mb\n");
+            fill_caches(h, mb_type_bottom, 1); //FIXME dont fill stuff which isnt used by filter_mb
+            filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+            if (tmp != s->current_picture.data[1][384]) {
+                tprintf("modified pixel 8,1 (2)\n");
+            }
+        } else {
+            tprintf("call filter_mb\n");
+            backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+            fill_caches(h, mb_type, 1); //FIXME dont fill stuff which isnt used by filter_mb
+            filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+        }
      }
  }
  
@@ -2859,7 +3126,7 @@ static int fill_default_ref_list(H264Context *h){
      MpegEncContext * const s = &h->s;
      int i;
      int smallest_poc_greater_than_current = -1;
-    Picture sorted_short_ref[16];
+    Picture sorted_short_ref[32];
      
      if(h->slice_type==B_TYPE){
          int out_i;
@@ -2933,12 +3200,12 @@ static int fill_default_ref_list(H264Context *h){
              }
          }else{
              int index=0;
-            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
+            for(i=0; i<h->short_ref_count; i++){
                  if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
                  h->default_ref_list[0][index  ]= *h->short_ref[i];
                  h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
              }
-            for(i = 0; i < 16 && index < h->ref_count[0]; i++){
+            for(i = 0; i < 16; i++){
                  if(h->long_ref[i] == NULL) continue;
                  if(h->long_ref[i]->reference != 3) continue;
                  h->default_ref_list[0][index  ]= *h->long_ref[i];
@@ -2988,6 +3255,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                  int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
                  int pic_id;
                  int i;
+                Picture *ref = NULL;
                  
                  if(reordering_of_pic_nums_idc==3) 
                      break;
@@ -3010,31 +3278,21 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                          else                                pred+= abs_diff_pic_num;
                          pred &= h->max_pic_num - 1;
                      
-                        for(i= h->ref_count[list]-1; i>=0; i--){
-                            if(h->ref_list[list][i].data[0] != NULL && h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0) // ignore non existing pictures by testing data[0] pointer
+                        for(i= h->short_ref_count-1; i>=0; i--){
+                            ref = h->short_ref[i];
+                            if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
                                  break;
                          }
                      }else{
                          pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
-
-                        for(i= h->ref_count[list]-1; i>=0; i--){
-                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1) // no need to ignore non existing pictures as non existing pictures have long_ref==0
-                                break;
-                        }
+                        ref = h->long_ref[pic_id];
                      }
  
                      if (i < 0) {
                          av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
                          memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
-                    } else if (i != index) /* this test is not necessary, it is only an optimisation to skip double copy of Picture structure in this case */ {
-                        Picture tmp= h->ref_list[list][i];
-                        if (i < index) {
-                            i = h->ref_count[list];
-                        }
-                        for(; i > index; i--){
-                            h->ref_list[list][i]= h->ref_list[list][i-1];
-                        }
-                        h->ref_list[list][index]= tmp;
+                    } else {
+                        h->ref_list[list][index]= *ref;
                      }
                  }else{
                      av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
@@ -3048,6 +3306,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
      
      if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
          direct_dist_scale_factor(h);
+    direct_ref_list_init(h);
      return 0;    
  }
  
@@ -3124,7 +3383,7 @@ static void implicit_weight_table(H264Context *h){
      for(ref0=0; ref0 < h->ref_count[0]; ref0++){
          int poc0 = h->ref_list[0][ref0].poc;
          for(ref1=0; ref1 < h->ref_count[1]; ref1++){
-            int poc1 = h->ref_list[0][ref1].poc;
+            int poc1 = h->ref_list[1][ref1].poc;
              int td = clip(poc1 - poc0, -128, 127);
              if(td){
                  int tb = clip(cur_poc - poc0, -128, 127);
@@ -3140,36 +3399,40 @@ static void implicit_weight_table(H264Context *h){
      }
  }
  
+static inline void unreference_pic(H264Context *h, Picture *pic){
+    int i;
+    pic->reference=0;
+    if(pic == h->delayed_output_pic)
+        pic->reference=1;
+    else{
+        for(i = 0; h->delayed_pic[i]; i++)
+            if(pic == h->delayed_pic[i]){
+                pic->reference=1;
+                break;
+            }
+    }
+}
+
  /**
   * instantaneous decoder refresh.
   */
  static void idr(H264Context *h){
-    int i,j;
-
-#define CHECK_DELAY(pic) \
-    for(j = 0; h->delayed_pic[j]; j++) \
-        if(pic == h->delayed_pic[j]){ \
-            pic->reference=1; \
-            break; \
-        }
+    int i;
  
      for(i=0; i<16; i++){
          if (h->long_ref[i] != NULL) {
-            h->long_ref[i]->reference=0;
-            CHECK_DELAY(h->long_ref[i]);
+            unreference_pic(h, h->long_ref[i]);
              h->long_ref[i]= NULL;
          }
      }
      h->long_ref_count=0;
  
      for(i=0; i<h->short_ref_count; i++){
-        h->short_ref[i]->reference=0;
-        CHECK_DELAY(h->short_ref[i]);
+        unreference_pic(h, h->short_ref[i]);
          h->short_ref[i]= NULL;
      }
      h->short_ref_count=0;
  }
-#undef CHECK_DELAY
  
  /**
   *
@@ -3260,11 +3523,11 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
          case MMCO_SHORT2UNUSED:
              pic= remove_short(h, mmco[i].short_frame_num);
              if(pic==NULL) return -1;
-            pic->reference= 0;
+            unreference_pic(h, pic);
              break;
          case MMCO_SHORT2LONG:
              pic= remove_long(h, mmco[i].long_index);
-            if(pic) pic->reference=0;
+            if(pic) unreference_pic(h, pic);
              
              h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
              h->long_ref[ mmco[i].long_index ]->long_ref=1;
@@ -3273,11 +3536,11 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
          case MMCO_LONG2UNUSED:
              pic= remove_long(h, mmco[i].long_index);
              if(pic==NULL) return -1;
-            pic->reference= 0;
+            unreference_pic(h, pic);
              break;
          case MMCO_LONG:
              pic= remove_long(h, mmco[i].long_index);
-            if(pic) pic->reference=0;
+            if(pic) unreference_pic(h, pic);
              
              h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
              h->long_ref[ mmco[i].long_index ]->long_ref=1;
@@ -3290,17 +3553,17 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
              // just remove the long term which index is greater than new max
              for(j = mmco[i].long_index; j<16; j++){
                  pic = remove_long(h, j);
-                if (pic) pic->reference=0;
+                if (pic) unreference_pic(h, pic);
              }
              break;
          case MMCO_RESET:
              while(h->short_ref_count){
                  pic= remove_short(h, h->short_ref[0]->frame_num);
-                pic->reference=0;
+                unreference_pic(h, pic);
              }
              for(j = 0; j < 16; j++) {
                  pic= remove_long(h, j);
-                if(pic) pic->reference=0;
+                if(pic) unreference_pic(h, pic);
              }
              break;
          default: assert(0);
@@ -3310,7 +3573,7 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
      if(!current_is_long){
          pic= remove_short(h, s->current_picture_ptr->frame_num);
          if(pic){
-            pic->reference=0;
+            unreference_pic(h, pic);
              av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
          }
          
@@ -3481,6 +3744,7 @@ static int decode_slice_header(H264Context *h){
      int default_ref_list_done = 0;
  
      s->current_picture.reference= h->nal_ref_idc != 0;
+    s->dropable= h->nal_ref_idc == 0;
  
      first_mb_in_slice= get_ue_golomb(&s->gb);
  
@@ -3522,14 +3786,11 @@ static int decode_slice_header(H264Context *h){
      }
      
      s->mb_width= h->sps.mb_width;
-    s->mb_height= h->sps.mb_height;
+    s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
      
      h->b_stride=  s->mb_width*4 + 1;
      h->b8_stride= s->mb_width*2 + 1;
  
-    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
-    s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
-    
      s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
      if(h->sps.frame_mbs_only_flag)
          s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
@@ -3566,15 +3827,22 @@ static int decode_slice_header(H264Context *h){
      s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
      h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
  
+    h->mb_aff_frame = 0;
      if(h->sps.frame_mbs_only_flag){
          s->picture_structure= PICT_FRAME;
      }else{
-        if(get_bits1(&s->gb)) //field_pic_flag
+        if(get_bits1(&s->gb)) { //field_pic_flag
              s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
-        else
+        } else {
              s->picture_structure= PICT_FRAME;
+            first_mb_in_slice <<= 1;
+            h->mb_aff_frame = h->sps.mb_aff;
+        }
      }
  
+    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
+    s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
+    
      if(s->picture_structure==PICT_FRAME){
          h->curr_pic_num=   h->frame_num;
          h->max_pic_num= 1<< h->sps.log2_max_frame_num;
@@ -3656,7 +3924,7 @@ static int decode_slice_header(H264Context *h){
          av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
          return -1;
      }
-    h->chroma_qp = get_chroma_qp(h, s->qscale);
+    h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
      //FIXME qscale / qp ... stuff
      if(h->slice_type == SP_TYPE){
          get_bits1(&s->gb); /* sp_for_switch_flag */
@@ -3687,8 +3955,10 @@ static int decode_slice_header(H264Context *h){
      h->slice_num++;
  
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d weight:%d%s\n", 
-               h->slice_num, first_mb_in_slice, 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d weight:%d%s\n", 
+               h->slice_num,
+               (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
+               first_mb_in_slice, 
                 av_get_pict_type_char(h->slice_type),
                 pps_id, h->frame_num,
                 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
@@ -3877,11 +4147,16 @@ static void decode_mb_skip(H264Context *h){
      memset(h->non_zero_count[mb_xy], 0, 16);
      memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
  
+    if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
+        h->mb_field_decoding_flag= get_bits1(&s->gb);
+    }
+    if(h->mb_field_decoding_flag)
+        mb_type|= MB_TYPE_INTERLACED;
+        
      if( h->slice_type == B_TYPE )
      {
          // just for fill_caches. pred_direct_motion will set the real mb_type
          mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
-        //FIXME mbaff
  
          fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
          pred_direct_motion(h, &mb_type);
@@ -3895,12 +4170,6 @@ static void decode_mb_skip(H264Context *h){
          int mx, my;
          mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
  
-        if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
-            h->mb_field_decoding_flag= get_bits1(&s->gb);
-        }
-        if(h->mb_field_decoding_flag)
-            mb_type|= MB_TYPE_INTERLACED;
-        
          fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
          pred_pskip_motion(h, &mx, &my);
          fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
@@ -3939,11 +4208,11 @@ static int decode_mb_cavlc(H264Context *h){
              return 0;
          }
      }
-    if(h->sps.mb_aff /* && !field pic FIXME needed? */){
-        if((s->mb_y&1)==0)
+    if(h->mb_aff_frame){
+        if ( ((s->mb_y&1) == 0) || h->prev_mb_skiped)
              h->mb_field_decoding_flag = get_bits1(&s->gb);
      }else
-        h->mb_field_decoding_flag=0; //FIXME som ed note ?!
+        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
      
      h->prev_mb_skiped= 0;
      
@@ -3984,38 +4253,39 @@ decode_intra_mb:
      h->slice_table[ mb_xy ]= h->slice_num;
      
      if(IS_INTRA_PCM(mb_type)){
-        const uint8_t *ptr;
-        int x, y;
+        unsigned int x, y;
          
          // we assume these blocks are very rare so we dont optimize it
          align_get_bits(&s->gb);
          
-        ptr= s->gb.buffer + get_bits_count(&s->gb);
-    
+        // The pixels are stored in the same order as levels in h->mb array.
          for(y=0; y<16; y++){
-            const int index= 4*(y&3) + 64*(y>>2);
+            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
              for(x=0; x<16; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
+                tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
              }
          }
          for(y=0; y<8; y++){
              const int index= 256 + 4*(y&3) + 32*(y>>2);
              for(x=0; x<8; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
+                tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
              }
          }
          for(y=0; y<8; y++){
              const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
              for(x=0; x<8; x++){
-                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
+                tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
+                h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
              }
          }
      
-        skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers
-        
-        //FIXME deblock filter, non_zero_count_cache init ...
+        // In deblocking, the quantiser is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+        // All coeffs are presents
          memset(h->non_zero_count[mb_xy], 16, 16);
-        s->current_picture.qscale_table[mb_xy]= s->qscale;
          
          return 0;
      }
@@ -4159,7 +4429,8 @@ decode_intra_mb:
                      if(IS_DIR(mb_type, 0, list)){
                          const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
                          fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
-                    }
+                    }else
+                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1);
                  }
              }
              for(list=0; list<2; list++){
@@ -4170,7 +4441,8 @@ decode_intra_mb:
                      tprintf("final mv:%d %d\n", mx, my);
  
                      fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
-                }
+                }else
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
              }
          }
          else if(IS_16X8(mb_type)){
@@ -4180,7 +4452,7 @@ decode_intra_mb:
                          if(IS_DIR(mb_type, i, list)){
                              const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
                              fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
-                        }else // needed only for mixed refs (e.g. B_L0_L1_16x8)
+                        }else
                              fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
                      }
                  }
@@ -4206,7 +4478,7 @@ decode_intra_mb:
                          if(IS_DIR(mb_type, i, list)){ //FIXME optimize
                              const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
                              fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
-                        }else // needed only for mixed refs
+                        }else
                              fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
                      }
                  }
@@ -4272,7 +4544,7 @@ decode_intra_mb:
              else            s->qscale-= 52;
          }
          
-        h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
+        h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
          if(IS_INTRA16x16(mb_type)){
              if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){
                  return -1; //FIXME continue if partotioned and other retirn -1 too
@@ -4560,6 +4832,7 @@ static int decode_cabac_mb_cbp_luma( H264Context *h) {
          }
  
          /* No need to test for skip as we put 0 for skip block */
+        /* No need to test for IPCM as we put 1 for IPCM block */
          if( mba_xy >= 0 ) {
              int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
              if( ((h->cbp_table[mba_xy] >> i8x8a)&0x01) == 0 )
@@ -4919,13 +5192,49 @@ decode_intra_mb:
      h->slice_table[ mb_xy ]= h->slice_num;
  
      if(IS_INTRA_PCM(mb_type)) {
-        /* TODO */
-        assert(0);
-        h->cbp_table[mb_xy] = 0xf +4*2; //FIXME ?!
-        h->cbp_table[mb_xy] |= 0x1C0;
+        const uint8_t *ptr;
+        unsigned int x, y;
+        
+        // We assume these blocks are very rare so we dont optimize it.
+        // FIXME The two following lines get the bitstream position in the cabac
+        // decode, I think it should be done by a function in cabac.h (or cabac.c).
+        ptr= h->cabac.bytestream;
+        if (h->cabac.low&0x1) ptr-=CABAC_BITS/8;
+
+        // The pixels are stored in the same order as levels in h->mb array.
+        for(y=0; y<16; y++){
+            const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
+            for(x=0; x<16; x++){
+                tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
+            }
+        }
+        for(y=0; y<8; y++){
+            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
+            for(x=0; x<8; x++){
+                tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr);
+                h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
+            }
+        }
+
+        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
+
+        // All blocks are presents
+        h->cbp_table[mb_xy] = 0x1ef;
          h->chroma_pred_mode_table[mb_xy] = 0;
-        s->current_picture.qscale_table[mb_xy]= s->qscale;
-        return -1;
+        // In deblocking, the quantiser is 0
+        s->current_picture.qscale_table[mb_xy]= 0;
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+        // All coeffs are presents
+        memset(h->non_zero_count[mb_xy], 16, 16);
+        return 0;
      }
  
      fill_caches(h, mb_type, 0);
@@ -5070,7 +5379,8 @@ decode_intra_mb:
                          const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
                          fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
                      }
-                }
+                }else
+                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
              }
              for(list=0; list<2; list++){
                  if(IS_DIR(mb_type, 0, list)){
@@ -5082,7 +5392,8 @@ decode_intra_mb:
  
                      fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
                      fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
-                }
+                }else
+                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
              }
          }
          else if(IS_16X8(mb_type)){
@@ -5107,7 +5418,7 @@ decode_intra_mb:
  
                          fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
                          fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
-                    }else{ // needed only for mixed refs
+                    }else{
                          fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                          fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                      }
@@ -5136,7 +5447,7 @@ decode_intra_mb:
                          tprintf("final mv:%d %d\n", mx, my);
                          fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
                          fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
-                    }else{ // needed only for mixed refs
+                    }else{
                          fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                          fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                      }
@@ -5175,7 +5486,7 @@ decode_intra_mb:
              if(s->qscale<0) s->qscale+= 52;
              else            s->qscale-= 52;
          }
-        h->chroma_qp = get_chroma_qp(h, s->qscale);
+        h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
  
          if( IS_INTRA16x16( mb_type ) ) {
              int i;
@@ -5287,6 +5598,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4]
                      i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
                      pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
                      pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
+                    tprintf("filter_mb_edgev i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], pix[-2], pix[-1], pix[0], pix[1]);
                  }
                  pix += stride;
              }
@@ -5333,6 +5645,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4]
                          pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                          pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
                      }
+                    tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
                  }
                  pix += stride;
              }
@@ -5367,7 +5680,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
  
                      pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
                      pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
-                    //tprintf("filter_mb_edgecv i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+                    tprintf("filter_mb_edgecv i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
                  }
                  pix += stride;
              }
@@ -5385,7 +5698,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
  
                      pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
                      pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
-                    //tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+                    tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
                  }
                  pix += stride;
              }
@@ -5393,6 +5706,160 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
      }
  }
  
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
+    int i;
+    for( i = 0; i < 16; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+    
+        int qp_index;
+        int bS_index = (i >> 1);
+        if (h->mb_field_decoding_flag) {
+            bS_index &= ~1;
+            bS_index |= (i & 1);
+        }
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
+        index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
+        alpha = alpha_table[index_a];
+        beta  = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
+
+
+        if( bS[bS_index] < 4 ) {
+            const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
+            /* 4px edge length */
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( ABS( p0 - q0 ) < alpha &&
+                ABS( p1 - p0 ) < beta &&
+                ABS( q1 - q0 ) < beta ) {
+                int tc = tc0;
+                int i_delta;
+
+                if( ABS( p2 - p0 ) < beta ) {
+                    pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+                if( ABS( q2 - q0 ) < beta ) {
+                    pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
+                    tc++;
+                }
+
+                i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+                pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            /* 4px edge length */
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int p2 = pix[-3];
+
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+            const int q2 = pix[2];
+
+            if( ABS( p0 - q0 ) < alpha &&
+                ABS( p1 - p0 ) < beta &&
+                ABS( q1 - q0 ) < beta ) {
+
+                if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+                    if( ABS( p2 - p0 ) < beta)
+                    {
+                        const int p3 = pix[-4];
+                        /* p0', p1', p2' */
+                        pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+                        pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+                        pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+                    } else {
+                        /* p0' */
+                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    }
+                    if( ABS( q2 - q0 ) < beta)
+                    {
+                        const int q3 = pix[3];
+                        /* q0', q1', q2' */
+                        pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+                        pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+                        pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+                    } else {
+                        /* q0' */
+                        pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                    }
+                }else{
+                    /* p0', q0' */
+                    pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+                    pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+                }
+                tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
+    int i;
+    for( i = 0; i < 8; i++, pix += stride) {
+        int index_a;
+        int alpha;
+        int beta;
+
+        int qp_index;
+        int bS_index = i;
+
+        if( bS[bS_index] == 0 ) {
+            continue;
+        }
+
+        qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
+        index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
+        alpha = alpha_table[index_a];
+        beta  = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
+        if( bS[bS_index] < 4 ) {
+            const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
+            /* 2px edge length (because we use same bS than the one for luma) */
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( ABS( p0 - q0 ) < alpha &&
+                ABS( p1 - p0 ) < beta &&
+                ABS( q1 - q0 ) < beta ) {
+                const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
+
+                pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
+                pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
+                tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
+            }
+        }else{
+            const int p0 = pix[-1];
+            const int p1 = pix[-2];
+            const int q0 = pix[0];
+            const int q1 = pix[1];
+
+            if( ABS( p0 - q0 ) < alpha &&
+                ABS( p1 - p0 ) < beta &&
+                ABS( q1 - q0 ) < beta ) {
+
+                pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
+                pix[0]  = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+                tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
+            }
+        }
+    }
+}
+
  static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
      int i, d;
      const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
@@ -5436,6 +5903,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4]
                      i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
                      pix[-pix_next] = clip_uint8( p0 + i_delta );    /* p0' */
                      pix[0]         = clip_uint8( q0 - i_delta );    /* q0' */
+                    tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
                  }
                  pix++;
              }
@@ -5480,6 +5948,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4]
                          pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
                          pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
                      }
+                    tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
                  }
                  pix++;
              }
@@ -5518,6 +5987,7 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
  
                      pix[-pix_next] = clip_uint8( p0 + i_delta );    /* p0' */
                      pix[0]         = clip_uint8( q0 - i_delta );    /* q0' */
+                    tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
                  }
                  pix++;
              }
@@ -5535,6 +6005,7 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
  
                      pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;   /* p0' */
                      pix[0]         = ( 2*q1 + q0 + p1 + 2 ) >> 2;   /* q0' */
+                    tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
                  }
                  pix++;
              }
@@ -5542,39 +6013,167 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
      }
  }
  
-static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr) {
+static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
      MpegEncContext * const s = &h->s;
      const int mb_xy= mb_x + mb_y*s->mb_stride;
-    int linesize, uvlinesize;
+    int first_vertical_edge_done = 0;
      int dir;
  
-    /* FIXME Implement deblocking filter for field MB */
-    if( h->sps.mb_aff ) {
-        return;
-    }
-    linesize = s->linesize;
-    uvlinesize = s->uvlinesize;
+    if (h->mb_aff_frame
+            // left mb is in picture
+            && h->slice_table[mb_xy-1] != 255
+            // and current and left pair do not have the same interlaced type
+            && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
+            // and left mb is in the same slice if deblocking_filter == 2
+            && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
+        /* First vertical edge is different in MBAFF frames
+         * There are 8 differents bS to compute and 2 differents Qp
+         */
+        int bS[8];
+        int qp[2];
+        int chroma_qp[2];
  
+        int i;
+        first_vertical_edge_done = 1;
+        for( i = 0; i < 8; i++ ) {
+            int y = i>>1;
+            int b_idx= 8 + 4 + 8*y;
+            int bn_idx= b_idx - 1;
+
+            int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1];
+
+            if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
+                IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+                bS[i] = 4;
+            } else if( h->non_zero_count_cache[b_idx] != 0 ||
+                h->non_zero_count_cache[bn_idx] != 0 ) {
+                bS[i] = 2;
+            } else {
+                /* FIXME: A given frame may occupy more than one position in
+                 * the reference list. So we should compare the frame numbers,
+                 * not the indices in the ref list. */
+                int l;
+                bS[i] = 0;
+                for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
+                    if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] ||
+                        ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                        ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
+                        bS[i] = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        if(bS[0]+bS[1]+bS[2]+bS[3] != 0) {
+            // Do not use s->qscale as luma quantiser because it has not the same
+            // value in IPCM macroblocks.
+            qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1;
+            chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
+                             get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1;
+            qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1;
+            chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) +
+                             get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1;
+
+            /* Filter edge */
+            tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
+            { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+            filter_mb_mbaff_edgev ( h, &img_y [0], linesize,   bS, qp );
+            filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
+            filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
+        }
+    }
      /* dir : 0 -> vertical edge, 1 -> horizontal edge */
      for( dir = 0; dir < 2; dir++ )
      {
          int edge;
-        const int mbm_xy = dir == 0 ? mb_xy -1 : mb_xy - s->mb_stride;
+        const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
          int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
  
+        if (first_vertical_edge_done) {
+            start = 1;
+            first_vertical_edge_done = 0;
+        }
+
          if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
              start = 1;
  
          /* Calculate bS */
          for( edge = start; edge < 4; edge++ ) {
-            /* mbn_xy: neighbour macroblock (how that works for field ?) */
+            /* mbn_xy: neighbour macroblock */
              int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
              int bS[4];
              int qp;
  
+            if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
+                && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
+                && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
+                ) {
+                // This is a special case in the norm where the filtering must
+                // be done twice (one each of the field) even if we are in a
+                // frame macroblock.
+                //
+                unsigned int tmp_linesize   = 2 *   linesize;
+                unsigned int tmp_uvlinesize = 2 * uvlinesize;
+                int mbn_xy = mb_xy - 2 * s->mb_stride;
+                int qp, chroma_qp;
+
+                // first filtering
+                if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
+                    IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 3;
+                } else {
+                    // TODO
+                    assert(0);
+                }
+                /* Filter edge */
+                // Do not use s->qscale as luma quantiser because it has not the same
+                // value in IPCM macroblocks.
+                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+                tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
+                { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+                filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp );
+                chroma_qp = ( h->chroma_qp +
+                              get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
+                filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
+
+                // second filtering
+                mbn_xy += s->mb_stride;
+                if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
+                    IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 3;
+                } else {
+                    // TODO
+                    assert(0);
+                }
+                /* Filter edge */
+                // Do not use s->qscale as luma quantiser because it has not the same
+                // value in IPCM macroblocks.
+                qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+                tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
+                { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
+                filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp );
+                chroma_qp = ( h->chroma_qp +
+                              get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
+                filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
+                continue;
+            }
              if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
                  IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
-                bS[0] = bS[1] = bS[2] = bS[3] = ( edge == 0 ? 4 : 3 );
+                int value;
+                if (edge == 0) {
+                    if (   (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
+                        || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
+                    ) {
+                        value = 4;
+                    } else {
+                        value = 3;
+                    }
+                } else {
+                    value = 3;
+                }
+                bS[0] = bS[1] = bS[2] = bS[3] = value;
              } else {
                  int i;
                  for( i = 0; i < 4; i++ ) {
@@ -5587,16 +6186,21 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                          h->non_zero_count_cache[bn_idx] != 0 ) {
                          bS[i] = 2;
                      }
-                    else if( h->slice_type == P_TYPE ) {
-                        if( h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx] ||
-                            ABS( h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] ) >= 4 ||
-                            ABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= 4 )
-                            bS[i] = 1;
-                        else
-                            bS[i] = 0;
-                    } else {
-                        /* FIXME Add support for B frame */
-                        return;
+                    else
+                    {
+                        /* FIXME: A given frame may occupy more than one position in
+                         * the reference list. So we should compare the frame numbers,
+                         * not the indices in the ref list. */
+                        int l;
+                        bS[i] = 0;
+                        for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
+                            if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] ||
+                                ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                                ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {
+                                bS[i] = 1;
+                                break;
+                            }
+                        }
                      }
                  }
  
@@ -5605,13 +6209,17 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
              }
  
              /* Filter edge */
-            qp = ( s->qscale + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+            // Do not use s->qscale as luma quantiser because it has not the same
+            // value in IPCM macroblocks.
+            qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
              //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
+            tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+            { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); }
              if( dir == 0 ) {
                  filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
                  if( (edge&1) == 0 ) {
                      int chroma_qp = ( h->chroma_qp +
-                                      get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                                      get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
                      filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
                      filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
                  }
@@ -5619,7 +6227,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                  filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
                  if( (edge&1) == 0 ) {
                      int chroma_qp = ( h->chroma_qp +
-                                      get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
+                                      get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
                      filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
                      filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
                  }
@@ -5661,20 +6269,20 @@ static int decode_slice(H264Context *h){
  
          for(;;){
              int ret = decode_mb_cabac(h);
-            int eos = get_cabac_terminate( &h->cabac ); /* End of Slice flag */
+            int eos;
  
              if(ret>=0) hl_decode_mb(h);
  
              /* XXX: useless as decode_mb_cabac it doesn't support that ... */
-            if( ret >= 0 && h->sps.mb_aff ) { //FIXME optimal? or let mb_decode decode 16x32 ?
+            if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ?
                  s->mb_y++;
  
                  if(ret>=0) ret = decode_mb_cabac(h);
-                eos = get_cabac_terminate( &h->cabac );
  
                  hl_decode_mb(h);
                  s->mb_y--;
              }
+            eos = get_cabac_terminate( &h->cabac );
  
              if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) {
                  av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
@@ -5686,6 +6294,9 @@ static int decode_slice(H264Context *h){
                  s->mb_x = 0;
                  ff_draw_horiz_band(s, 16*s->mb_y, 16);
                  ++s->mb_y;
+                if(h->mb_aff_frame) {
+                    ++s->mb_y;
+                }
              }
  
              if( eos || s->mb_y >= s->mb_height ) {
@@ -5693,13 +6304,6 @@ static int decode_slice(H264Context *h){
                  ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
                  return 0;
              }
-#if 0
-            /* TODO test over-reading in cabac code */
-            else if( read too much in h->cabac ) {
-                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-                return -1;
-            }
-#endif
          }
  
      } else {
@@ -5708,7 +6312,7 @@ static int decode_slice(H264Context *h){
  
              if(ret>=0) hl_decode_mb(h);
  
-            if(ret>=0 && h->sps.mb_aff){ //FIXME optimal? or let mb_decode decode 16x32 ?
+            if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ?
                  s->mb_y++;
                  ret = decode_mb_cavlc(h);
  
@@ -5726,7 +6330,11 @@ static int decode_slice(H264Context *h){
              if(++s->mb_x >= s->mb_width){
                  s->mb_x=0;
                  ff_draw_horiz_band(s, 16*s->mb_y, 16);
-                if(++s->mb_y >= s->mb_height){
+                ++s->mb_y;
+                if(h->mb_aff_frame) {
+                    ++s->mb_y;
+                }
+                if(s->mb_y >= s->mb_height){
                      tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
  
                      if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
@@ -5804,9 +6412,27 @@ static int decode_slice(H264Context *h){
      return -1; //not reached
  }
  
+static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
+    MpegEncContext * const s = &h->s;
+    int cpb_count, i;
+    cpb_count = get_ue_golomb(&s->gb) + 1;
+    get_bits(&s->gb, 4); /* bit_rate_scale */
+    get_bits(&s->gb, 4); /* cpb_size_scale */
+    for(i=0; i<cpb_count; i++){
+        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
+        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
+        get_bits1(&s->gb);     /* cbr_flag */
+    }
+    get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
+    get_bits(&s->gb, 5); /* time_offset_length */
+}
+
  static inline int decode_vui_parameters(H264Context *h, SPS *sps){
      MpegEncContext * const s = &h->s;
      int aspect_ratio_info_present_flag, aspect_ratio_idc;
+    int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
  
      aspect_ratio_info_present_flag= get_bits1(&s->gb);
      
@@ -5853,29 +6479,27 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
          sps->fixed_frame_rate_flag = get_bits1(&s->gb);
      }
  
-#if 0
-| nal_hrd_parameters_present_flag                   |0  |u(1)    |
-| if( nal_hrd_parameters_present_flag  = =  1)      |   |        |
-|  hrd_parameters( )                                |   |        |
-| vcl_hrd_parameters_present_flag                   |0  |u(1)    |
-| if( vcl_hrd_parameters_present_flag  = =  1)      |   |        |
-|  hrd_parameters( )                                |   |        |
-| if( ( nal_hrd_parameters_present_flag  = =  1  | ||   |        |
-|                                                   |   |        |
-|( vcl_hrd_parameters_present_flag  = =  1 ) )      |   |        |
-|  low_delay_hrd_flag                               |0  |u(1)    |
-| bitstream_restriction_flag                        |0  |u(1)    |
-| if( bitstream_restriction_flag ) {                |0  |u(1)    |
-|  motion_vectors_over_pic_boundaries_flag          |0  |u(1)    |
-|  max_bytes_per_pic_denom                          |0  |ue(v)   |
-|  max_bits_per_mb_denom                            |0  |ue(v)   |
-|  log2_max_mv_length_horizontal                    |0  |ue(v)   |
-|  log2_max_mv_length_vertical                      |0  |ue(v)   |
-|  num_reorder_frames                               |0  |ue(v)   |
-|  max_dec_frame_buffering                          |0  |ue(v)   |
-| }                                                 |   |        |
-|}                                                  |   |        |
-#endif
+    nal_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(nal_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
+    if(vcl_hrd_parameters_present_flag)
+        decode_hrd_parameters(h, sps);
+    if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
+        get_bits1(&s->gb);     /* low_delay_hrd_flag */
+    get_bits1(&s->gb);         /* pic_struct_present_flag */
+
+    sps->bitstream_restriction_flag = get_bits1(&s->gb);
+    if(sps->bitstream_restriction_flag){
+        get_bits1(&s->gb);     /* motion_vectors_over_pic_boundaries_flag */
+        get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
+        get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
+        get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
+        sps->num_reorder_frames = get_ue_golomb(&s->gb);
+        get_ue_golomb(&s->gb); /* max_dec_frame_buffering */
+    }
+
      return 0;
  }
  
@@ -6204,7 +6828,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
              if(s->flags& CODEC_FLAG_LOW_DELAY)
                  s->low_delay=1;
        
-            avctx->has_b_frames= !s->low_delay;
+            if(avctx->has_b_frames < 2)
+                avctx->has_b_frames= !s->low_delay;
              break;
          case NAL_PPS:
              init_get_bits(&s->gb, ptr, bit_length);
@@ -6219,13 +6844,12 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
         default:
             av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
          }        
-
-        //FIXME move after where irt is set
-        s->current_picture.pict_type= s->pict_type;
-        s->current_picture.key_frame= s->pict_type == I_TYPE;
      }
      
      if(!s->current_picture_ptr) return buf_index; //no frame
+
+    s->current_picture_ptr->pict_type= s->pict_type;
+    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
      
      h->prev_frame_num_offset= h->frame_num_offset;
      h->prev_frame_num= h->frame_num;
@@ -6344,41 +6968,77 @@ static int decode_frame(AVCodecContext *avctx,
      }
  
      {
-//#define DECODE_ORDER
          Picture *out = s->current_picture_ptr;
-#ifndef DECODE_ORDER
-        /* Sort B-frames into display order
-         * FIXME doesn't allow for multiple delayed frames */
+#if 0 //decode order
+        *data_size = sizeof(AVFrame);
+#else
+        /* Sort B-frames into display order */
          Picture *cur = s->current_picture_ptr;
-        Picture *prev = h->delayed_pic[0];
+        Picture *prev = h->delayed_output_pic;
+        int out_idx = 0;
+        int pics = 0;
+        int out_of_order;
+        int cross_idr = 0;
+        int dropped_frame = 0;
+        int i;
  
-        if(s->low_delay
-           && (cur->pict_type == B_TYPE
-           || (!h->sps.gaps_in_frame_num_allowed_flag
-               && prev && cur->poc - prev->poc > 2))){
+        if(h->sps.bitstream_restriction_flag
+           && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+            s->avctx->has_b_frames = h->sps.num_reorder_frames;
              s->low_delay = 0;
-            s->avctx->has_b_frames = 1;
-            if(prev && prev->poc > cur->poc)
-                // too late to display this frame
-                cur = prev;
          }
  
-        if(s->low_delay || !prev || cur->pict_type == B_TYPE)
-            out = cur;
-        else
+        while(h->delayed_pic[pics]) pics++;
+        h->delayed_pic[pics++] = cur;
+        if(cur->reference == 0)
+            cur->reference = 1;
+
+        for(i=0; h->delayed_pic[i]; i++)
+            if(h->delayed_pic[i]->key_frame)
+                cross_idr = 1;
+
+        out = h->delayed_pic[0];
+        for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+            if(h->delayed_pic[i]->poc < out->poc){
+                out = h->delayed_pic[i];
+                out_idx = i;
+            }
+
+        out_of_order = !cross_idr && prev && out->poc < prev->poc;
+        if(prev && pics <= s->avctx->has_b_frames)
+            out = prev;
+        else if((out_of_order && pics-1 == s->avctx->has_b_frames)
+           || (s->low_delay && 
+            ((!cross_idr && prev && out->poc > prev->poc + 2)
+             || cur->pict_type == B_TYPE)))
+        {
+            s->low_delay = 0;
+            s->avctx->has_b_frames++;
              out = prev;
-        if(s->low_delay || !prev || out == prev){
-            if(prev && prev->reference == 1)
-                prev->reference = 0;
-            h->delayed_pic[0] = cur;
          }
+        else if(out_of_order)
+            out = prev;
+
+        if(out_of_order || pics > s->avctx->has_b_frames){
+            dropped_frame = (out != h->delayed_pic[out_idx]);
+            for(i=out_idx; h->delayed_pic[i]; i++)
+                h->delayed_pic[i] = h->delayed_pic[i+1];
+        }
+
+        if(prev == out && !dropped_frame)
+            *data_size = 0;
+        else
+            *data_size = sizeof(AVFrame);
+        if(prev && prev != out && prev->reference == 1)
+            prev->reference = 0;
+        h->delayed_output_pic = out;
  #endif
  
          *pict= *(AVFrame*)out;
      }
  
-    ff_print_debug_info(s, pict);
      assert(pict->data[0]);
+    ff_print_debug_info(s, pict);
  //printf("out %d\n", (int)pict->data[0]);
  #if 0 //?
  
@@ -6386,12 +7046,6 @@ static int decode_frame(AVCodecContext *avctx,
      /* we substract 1 because it is added on utils.c    */
      avctx->frame_number = s->picture_number - 1;
  #endif
-#if 0
-    /* dont output the last pic after seeking */
-    if(s->last_picture_ptr || s->low_delay)
-    //Note this isnt a issue as a IDR pic should flush the buffers
-#endif
-        *data_size = sizeof(AVFrame);
      return get_consumed_bytes(s, buf_index, buf_size);
  }
  #if 0