Minimal support for the new pixel formats in libavcodec

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index ad25c7c99ff4bf64554f76357df780b148fb01d5..4f0f875de71bbb762134d63ac4bdee52f5e95496 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -409,6 +409,7 @@ static VLC run7_vlc;
  static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
  static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
  static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
  
  static always_inline uint32_t pack16to32(int a, int b){
  #ifdef WORDS_BIGENDIAN
@@ -514,10 +515,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      int left_block[8];
      int i;
  
-    //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
-    // the actual condition is whether we're on the edge of a slice,
-    // and even then the intra and nnz parts are unnecessary.
-    if(for_deblock && h->slice_num == 1 && !FRAME_MBAFF)
+    //FIXME deblocking could skip the intra and nnz parts.
+    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
          return;
  
      //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
@@ -604,9 +603,9 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      h->left_mb_xy[0] = left_xy[0];
      h->left_mb_xy[1] = left_xy[1];
      if(for_deblock){
-        topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0;
+        topleft_type = 0;
+        topright_type = 0;
          top_type     = h->slice_table[top_xy     ] < 255 ? s->current_picture.mb_type[top_xy]     : 0;
-        topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
          left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
          left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
  
@@ -781,7 +780,6 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      }
  
  #if 1
-    //FIXME direct mb can skip much of this
      if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
          int list;
          for(list=0; list<1+(h->slice_type==B_TYPE); list++){
@@ -844,7 +842,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
                  assert((!left_type[0]) == (!left_type[1]));
              }
  
-            if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
+            if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
                  continue;
  
              if(USES_LIST(topleft_type, list)){
@@ -867,6 +865,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
                  h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
              }
  
+            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
+                continue;
  
              h->ref_cache[list][scan8[5 ]+1] =
              h->ref_cache[list][scan8[7 ]+1] =
@@ -3880,7 +3880,7 @@ static void hl_decode_mb(H264Context *h){
              tprintf("call filter_mb\n");
              backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
              fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
-            filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+            filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
          }
      }
  }
@@ -6380,8 +6380,8 @@ decode_intra_mb:
                  sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
                  h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
              }
-            if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
-               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
+            if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
+                          h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
                  pred_direct_motion(h, &mb_type);
                  if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
                      for( i = 0; i < 4; i++ )
@@ -6695,7 +6695,7 @@ decode_intra_mb:
  }
  
  
-static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
      int i, d;
      const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
      const int alpha = alpha_table[index_a];
@@ -6756,7 +6756,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4]
              }
      }
  }
-static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
      int i;
      const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
      const int alpha = alpha_table[index_a];
@@ -6772,7 +6772,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4
      }
  }
  
-static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
+static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
      int i;
      for( i = 0; i < 16; i++, pix += stride) {
          int index_a;
@@ -6870,7 +6870,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
          }
      }
  }
-static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
+static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
      int i;
      for( i = 0; i < 8; i++, pix += stride) {
          int index_a;
@@ -6923,7 +6923,7 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
      }
  }
  
-static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
      int i, d;
      const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
      const int alpha = alpha_table[index_a];
@@ -6983,7 +6983,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4]
      }
  }
  
-static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
+static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
      int i;
      const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
      const int alpha = alpha_table[index_a];
@@ -6999,6 +6999,108 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4
      }
  }
  
+static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
+    MpegEncContext * const s = &h->s;
+    int mb_xy, mb_type;
+    int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
+
+    if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
+        filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
+        return;
+    }
+    assert(!FRAME_MBAFF);
+
+    mb_xy = mb_x + mb_y*s->mb_stride;
+    mb_type = s->current_picture.mb_type[mb_xy];
+    qp = s->current_picture.qscale_table[mb_xy];
+    qp0 = s->current_picture.qscale_table[mb_xy-1];
+    qp1 = s->current_picture.qscale_table[h->top_mb_xy];
+    qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
+    qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
+    qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
+    qp0 = (qp + qp0 + 1) >> 1;
+    qp1 = (qp + qp1 + 1) >> 1;
+    qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
+    if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh)
+        return;
+    qpc0 = (qpc + qpc0 + 1) >> 1;
+    qpc1 = (qpc + qpc1 + 1) >> 1;
+
+    if( IS_INTRA(mb_type) ) {
+        int16_t bS4[4] = {4,4,4,4};
+        int16_t bS3[4] = {3,3,3,3};
+        if( IS_8x8DCT(mb_type) ) {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+        } else {
+            filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
+            filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
+            filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+            filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
+            filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
+        }
+        filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
+        filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+        filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+        filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
+        return;
+    } else {
+        DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
+        uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
+        int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
+                            == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+        int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
+                         (mb_type & MB_TYPE_16x8) ? 1 : 0;
+        int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                         && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
+                         ? 3 : 0;
+        int step = IS_8x8DCT(mb_type) ? 2 : 1;
+        s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
+                                          (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
+        if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
+            bSv[0][0] = 0x0004000400040004ULL;
+        if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
+            bSv[1][0] = 0x0004000400040004ULL;
+
+#define FILTER(hv,dir,edge)\
+        if(bSv[dir][edge]) {\
+            filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
+            if(!(edge&1)) {\
+                filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+                filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
+            }\
+        }
+        if( edges == 1 ) {
+            FILTER(v,0,0);
+            FILTER(h,1,0);
+        } else if( IS_8x8DCT(mb_type) ) {
+            FILTER(v,0,0);
+            FILTER(v,0,2);
+            FILTER(h,1,0);
+            FILTER(h,1,2);
+        } else {
+            FILTER(v,0,0);
+            FILTER(v,0,1);
+            FILTER(v,0,2);
+            FILTER(v,0,3);
+            FILTER(h,1,0);
+            FILTER(h,1,1);
+            FILTER(h,1,2);
+            FILTER(h,1,3);
+        }
+#undef FILTER
+    }
+}
+
  static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
      MpegEncContext * const s = &h->s;
      const int mb_xy= mb_x + mb_y*s->mb_stride;
@@ -7036,7 +7138,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
           */
          const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
          const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
-        int bS[8];
+        int16_t bS[8];
          int qp[2];
          int chroma_qp[2];
          int mb_qp, mbn0_qp, mbn1_qp;
@@ -7115,7 +7217,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
              int mbn_xy = mb_xy - 2 * s->mb_stride;
              int qp, chroma_qp;
              int i, j;
-            int bS[4];
+            int16_t bS[4];
  
              for(j=0; j<2; j++, mbn_xy += s->mb_stride){
                  if( IS_INTRA(mb_type) ||
@@ -7151,7 +7253,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
              /* mbn_xy: neighbor macroblock */
              const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
              const int mbn_type = s->current_picture.mb_type[mbn_xy];
-            int bS[4];
+            int16_t bS[4];
              int qp;
  
              if( (edge&1) && IS_8x8DCT(mb_type) )