Merge remote-tracking branch 'qatar/master'

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index 1655a41988e68d8c27e17861298e86bee65c3ae5..117cd7e630a2fd9ee4bc3d6d30962270f530544e 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -48,6 +48,8 @@
  // #undef NDEBUG
  #include <assert.h>
  
+const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
+
  static const uint8_t rem6[QP_MAX_NUM + 1] = {
      0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
      3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
@@ -271,7 +273,7 @@ nsc:
   * Identify the exact end of the bitstream
   * @return the length of the trailing, or 0 if damaged
   */
-static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src)
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src)
  {
      int v = *src;
      int r;
@@ -1249,8 +1251,10 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
      }
  
      if (avctx->extradata_size > 0 && avctx->extradata &&
-        ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size) < 0)
+        ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size) < 0) {
+        ff_h264_free_context(h);
          return -1;
+    }
  
      if (h->sps.bitstream_restriction_flag &&
          s->avctx->has_b_frames < h->sps.num_reorder_frames) {
@@ -2126,7 +2130,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
          if (pixel_shift) {
              int j;
              GetBitContext gb;
-            init_get_bits(&gb, (uint8_t *)h->mb, 384 * bit_depth);
+            init_get_bits(&gb, (uint8_t *)h->mb,
+                          ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);
  
              for (i = 0; i < 16; i++) {
                  uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
@@ -2157,7 +2162,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
              }
          } else {
              for (i = 0; i < 16; i++)
-                memcpy(dest_y + i * linesize, h->mb + i * 8, 16);
+                memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
              if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
                  if (!h->sps.chroma_format_idc) {
                      for (i = 0; i < 8; i++) {
@@ -2165,9 +2170,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
                          memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
                      }
                  } else {
+                    uint8_t *src_cb = (uint8_t *)h->mb + 256;
+                    uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
                      for (i = 0; i < block_h; i++) {
-                        memcpy(dest_cb + i * uvlinesize, h->mb + 128 + i * 4, 8);
-                        memcpy(dest_cr + i * uvlinesize, h->mb + 160 + i * 4, 8);
+                        memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
+                        memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
                      }
                  }
              }
@@ -2179,8 +2186,10 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple,
                                 uvlinesize, 1, 0, simple, pixel_shift);
  
              if (simple || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
-                h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
-                h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
+                if (CHROMA) {
+                    h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
+                    h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
+                }
              }
  
              hl_decode_mb_predict_luma(h, mb_type, is_h264, simple,
@@ -2356,7 +2365,8 @@ static av_always_inline void hl_decode_mb_444_internal(H264Context *h,
          } else {
              for (p = 0; p < plane_count; p++)
                  for (i = 0; i < 16; i++)
-                    memcpy(dest[p] + i * linesize, h->mb + p * 128 + i * 8, 16);
+                    memcpy(dest[p] + i * linesize,
+                           (uint8_t *)h->mb + p * 256 + i * 16, 16);
          }
      } else {
          if (IS_INTRA(mb_type)) {
@@ -2406,17 +2416,17 @@ hl_decode_mb_simple(1, 16)
  /**
   * Process a macroblock; this handles edge cases, such as interlacing.
   */
-static void av_noinline hl_decode_mb_complex(H264Context *h)
+static av_noinline void hl_decode_mb_complex(H264Context *h)
  {
      hl_decode_mb_internal(h, 0, h->pixel_shift);
  }
  
-static void av_noinline hl_decode_mb_444_complex(H264Context *h)
+static av_noinline void hl_decode_mb_444_complex(H264Context *h)
  {
      hl_decode_mb_444_internal(h, 0, h->pixel_shift);
  }
  
-static void av_noinline hl_decode_mb_444_simple(H264Context *h)
+static av_noinline void hl_decode_mb_444_simple(H264Context *h)
  {
      hl_decode_mb_444_internal(h, 1, 0);
  }
@@ -2857,6 +2867,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      unsigned int slice_type, tmp, i, j;
      int default_ref_list_done = 0;
      int last_pic_structure, last_pic_dropable;
+    int must_reinit;
  
      /* FIXME: 2tap qpel isn't implemented for high bit depth. */
      if ((s->avctx->flags2 & CODEC_FLAG2_FAST) &&
@@ -2935,6 +2946,19 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      s->avctx->level   = h->sps.level_idc;
      s->avctx->refs    = h->sps.ref_frame_count;
  
+    must_reinit = (s->context_initialized &&
+                    (   16*h->sps.mb_width != s->avctx->coded_width
+                     || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != s->avctx->coded_height
+                     || s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
+                     || h->cur_chroma_format_idc != h->sps.chroma_format_idc
+                     || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio)));
+
+    if(must_reinit && (h != h0 || (s->avctx->active_thread_type & FF_THREAD_FRAME))) {
+        av_log_missing_feature(s->avctx,
+                                "Width/height/bit depth/chroma idc changing with threads is", 0);
+        return -1;   // width / height changed during parallelized decoding
+    }
+
      s->mb_width  = h->sps.mb_width;
      s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
  
@@ -2946,20 +2970,12 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      s->height = 16 * s->mb_height;
  
  
-    if (s->context_initialized &&
-        (   s->width != s->avctx->coded_width || s->height != s->avctx->coded_height
-            || s->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
-            || h->cur_chroma_format_idc != h->sps.chroma_format_idc
-            || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
-        if(h != h0 || (s->avctx->active_thread_type & FF_THREAD_FRAME)) {
-            av_log_missing_feature(s->avctx,
-                                   "Width/height/bit depth/chroma idc changing with threads is", 0);
-            return -1;   // width / height changed during parallelized decoding
-        }
+    if(must_reinit) {
          free_tables(h, 0);
          flush_dpb(s->avctx);
          ff_MPV_common_end(s);
          h->list_count = 0;
+        h->current_slice = 0;
      }
      if (!s->context_initialized) {
          if (h != h0) {
@@ -3629,7 +3645,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
          if (USES_LIST(top_type, list)) {
              const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
              const int b8_xy = 4 * top_xy + 2;
-            int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
+            int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
              AV_COPY128(mv_dst - 1 * 8, s->current_picture.f.motion_val[list][b_xy + 0]);
              ref_cache[0 - 1 * 8] =
              ref_cache[1 - 1 * 8] = ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
@@ -3644,7 +3660,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
              if (USES_LIST(left_type[LTOP], list)) {
                  const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                  const int b8_xy = 4 * left_xy[LTOP] + 1;
-                int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
+                int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
                  AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride * 0]);
                  AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride * 1]);
                  AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride * 2]);
@@ -3677,7 +3693,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
  
      {
          int8_t *ref = &s->current_picture.f.ref_index[list][4 * mb_xy];
-        int (*ref2frm)[64] = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2);
+        int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF ? 20 : 2));
          uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
          uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
          AV_WN32A(&ref_cache[0 * 8], ref01);
@@ -4253,7 +4269,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size)
                      dst_length--;
              bit_length = !dst_length ? 0
                                       : (8 * dst_length -
-                                        ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
+                                        decode_rbsp_trailing(h, ptr + dst_length - 1));
  
              if (s->avctx->debug & FF_DEBUG_STARTCODE)
                  av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d/%d at %d/%d length %d pass %d\n", hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length, pass);
@@ -4599,7 +4615,7 @@ av_cold void ff_h264_free_context(H264Context *h)
          av_freep(h->pps_buffers + i);
  }
  
-av_cold int ff_h264_decode_end(AVCodecContext *avctx)
+static av_cold int h264_decode_end(AVCodecContext *avctx)
  {
      H264Context *h    = avctx->priv_data;
      MpegEncContext *s = &h->s;
@@ -4657,7 +4673,7 @@ AVCodec ff_h264_decoder = {
      .id                    = CODEC_ID_H264,
      .priv_data_size        = sizeof(H264Context),
      .init                  = ff_h264_decode_init,
-    .close                 = ff_h264_decode_end,
+    .close                 = h264_decode_end,
      .decode                = decode_frame,
      .capabilities          = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 |
                               CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS |
@@ -4677,7 +4693,7 @@ AVCodec ff_h264_vdpau_decoder = {
      .id             = CODEC_ID_H264,
      .priv_data_size = sizeof(H264Context),
      .init           = ff_h264_decode_init,
-    .close          = ff_h264_decode_end,
+    .close          = h264_decode_end,
      .decode         = decode_frame,
      .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
      .flush          = flush_dpb,