h264: Copy h264chroma dsp context to slice thread copies

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index ad3f15c3f3779302eb8cc70a502f031da8f0f55d..542070be938436f505216528c72e9777fe3293e2 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -34,10 +34,12 @@
  #include "mpegvideo.h"
  #include "h264.h"
  #include "h264data.h"
+#include "h264chroma.h"
  #include "h264_mvpred.h"
  #include "golomb.h"
  #include "mathops.h"
  #include "rectangle.h"
+#include "svq3.h"
  #include "thread.h"
  #include "vdpau_internal.h"
  #include "libavutil/avassert.h"
@@ -60,9 +62,18 @@ static const uint8_t div6[QP_MAX_NUM + 1] = {
  };
  
  static const enum AVPixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
+#if CONFIG_H264_DXVA2_HWACCEL
      AV_PIX_FMT_DXVA2_VLD,
+#endif
+#if CONFIG_H264_VAAPI_HWACCEL
      AV_PIX_FMT_VAAPI_VLD,
+#endif
+#if CONFIG_H264_VDA_HWACCEL
      AV_PIX_FMT_VDA_VLD,
+#endif
+#if CONFIG_H264_VDPAU_HWACCEL
+    AV_PIX_FMT_VDPAU,
+#endif
      AV_PIX_FMT_YUVJ420P,
      AV_PIX_FMT_NONE
  };
@@ -290,10 +301,11 @@ static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
                                           int height, int y_offset, int list)
  {
      int raw_my        = h->mv_cache[list][scan8[n]][1];
-    int filter_height = (raw_my & 3) ? 2 : 0;
+    int filter_height_up   = (raw_my & 3) ? 2 : 0;
+    int filter_height_down = (raw_my & 3) ? 3 : 0;
      int full_my       = (raw_my >> 2) + y_offset;
-    int top           = full_my - filter_height;
-    int bottom        = full_my + filter_height + height;
+    int top           = full_my - filter_height_up;
+    int bottom        = full_my + filter_height_down + height;
  
      return FFMAX(abs(top), bottom);
  }
@@ -486,11 +498,11 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
          full_my                <          0 - extra_height ||
          full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
          full_my + 16 /*FIXME*/ > pic_height + extra_height) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                h->mb_linesize,
-                                16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
-                                full_my - 2, pic_width, pic_height);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                 h->mb_linesize,
+                                 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
+                                 full_my - 2, pic_width, pic_height);
          src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          emu   = 1;
      }
@@ -505,12 +517,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
      if (chroma_idc == 3 /* yuv444 */) {
          src_cb = pic->f.data[1] + offset;
          if (emu) {
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                    src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                    h->mb_linesize,
-                                    16 + 5, 16 + 5 /*FIXME*/,
-                                    full_mx - 2, full_my - 2,
-                                    pic_width, pic_height);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                     src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                     h->mb_linesize,
+                                     16 + 5, 16 + 5 /*FIXME*/,
+                                     full_mx - 2, full_my - 2,
+                                     pic_width, pic_height);
              src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          }
          qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
@@ -519,12 +531,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
  
          src_cr = pic->f.data[2] + offset;
          if (emu) {
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                    src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                    h->mb_linesize,
-                                    16 + 5, 16 + 5 /*FIXME*/,
-                                    full_mx - 2, full_my - 2,
-                                    pic_width, pic_height);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                     src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                     h->mb_linesize,
+                                     16 + 5, 16 + 5 /*FIXME*/,
+                                     full_mx - 2, full_my - 2,
+                                     pic_width, pic_height);
              src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          }
          qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
@@ -546,9 +558,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
               (my >> ysh) * h->mb_uvlinesize;
  
      if (emu) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
-                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
-                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
+                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
+                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
          src_cb = s->edge_emu_buffer;
      }
      chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
@@ -556,9 +568,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
                mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
  
      if (emu) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
-                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
-                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
+                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
+                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
          src_cr = s->edge_emu_buffer;
      }
      chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
@@ -735,15 +747,15 @@ static av_always_inline void prefetch_motion(H264Context *h, int list,
          int off       = (mx << pixel_shift) +
                          (my + (s->mb_x & 3) * 4) * h->mb_linesize +
                          (64 << pixel_shift);
-        s->dsp.prefetch(src[0] + off, s->linesize, 4);
+        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
          if (chroma_idc == 3 /* yuv444 */) {
-            s->dsp.prefetch(src[1] + off, s->linesize, 4);
-            s->dsp.prefetch(src[2] + off, s->linesize, 4);
+            s->vdsp.prefetch(src[1] + off, s->linesize, 4);
+            s->vdsp.prefetch(src[2] + off, s->linesize, 4);
          } else {
              off = ((mx >> 1) << pixel_shift) +
                    ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize +
                    (64 << pixel_shift);
-            s->dsp.prefetch(src[1] + off, src[2] - src[1], 2);
+            s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
          }
      }
  }
@@ -966,6 +978,8 @@ static av_cold void common_init(H264Context *h)
      s->codec_id = s->avctx->codec->id;
  
      ff_h264dsp_init(&h->h264dsp, 8, 1);
+    ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
+    ff_h264qpel_init(&h->h264qpel, 8);
      ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
  
      h->dequant_coeff_pps = -1;
@@ -973,6 +987,7 @@ static av_cold void common_init(H264Context *h)
  
      /* needed so that IDCT permutation is known early */
      ff_dsputil_init(&s->dsp, s->avctx);
+    ff_videodsp_init(&s->vdsp, 8);
  
      memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
      memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
@@ -1251,7 +1266,7 @@ static int decode_update_thread_context(AVCodecContext *dst,
  
      // reference lists
      copy_fields(h, h1, ref_count, list_count);
-    copy_fields(h, h1, ref_list, intra_gb);
+    copy_fields(h, h1, ref2frm, intra_gb);
      copy_fields(h, h1, short_ref, cabac_init_idc);
  
      copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
@@ -1403,7 +1418,6 @@ static void decode_postinit(H264Context *h, int setup_finished)
              cur->f.repeat_pict = 1;
              break;
          case SEI_PIC_STRUCT_FRAME_DOUBLING:
-            // Force progressive here, doubling interlaced frame is a bad idea.
              cur->f.repeat_pict = 2;
              break;
          case SEI_PIC_STRUCT_FRAME_TRIPLING:
@@ -1748,7 +1762,7 @@ static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
      }
  }
  
-static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
+static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
                                          int index)
  {
      if (high_bit_depth) {
@@ -1757,7 +1771,7 @@ static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
          return AV_RN16A(mb + index);
  }
  
-static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth,
+static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
                                           int index, int value)
  {
      if (high_bit_depth) {
@@ -1776,8 +1790,8 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
                                                         uint8_t *dest_y, int p)
  {
      MpegEncContext *const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
      int i;
      int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
      block_offset += 16 * p;
@@ -1893,7 +1907,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
                                                      uint8_t *dest_y, int p)
  {
      MpegEncContext *const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
      int i;
      block_offset += 16 * p;
      if (!IS_INTRA4x4(mb_type)) {
@@ -2367,7 +2381,6 @@ static int clone_slice(H264Context *dst, H264Context *src)
      memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
      memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
      memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
-    memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
  
      memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
      memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
@@ -2435,10 +2448,13 @@ static int h264_set_parameter_from_sps(H264Context *h)
  
              ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
                              h->sps.chroma_format_idc);
+            ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
+            ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
              ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma,
                                h->sps.chroma_format_idc);
              s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
              ff_dsputil_init(&s->dsp, s->avctx);
+            ff_videodsp_init(&s->vdsp, h->sps.bit_depth_luma);
          } else {
              av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
                     h->sps.bit_depth_luma);
@@ -2525,7 +2541,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
              return ret;
          }
      } else {
-        if ((ret = ff_MPV_common_init(s) < 0)) {
+        if ((ret = ff_MPV_common_init(s)) < 0) {
              av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
              return ret;
          }
@@ -2552,6 +2568,8 @@ static int h264_slice_header_init(H264Context *h, int reinit)
              memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
              memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
              c->h264dsp     = h->h264dsp;
+            c->h264qpel    = h->h264qpel;
+            c->h264chroma  = h->h264chroma;
              c->sps         = h->sps;
              c->pps         = h->pps;
              c->pixel_shift = h->pixel_shift;
@@ -2590,17 +2608,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      int default_ref_list_done = 0;
      int last_pic_structure, last_pic_droppable;
      int needs_reinit = 0;
-    enum AVPixelFormat pix_fmt;
  
-    /* FIXME: 2tap qpel isn't implemented for high bit depth. */
-    if ((s->avctx->flags2 & CODEC_FLAG2_FAST) &&
-        !h->nal_ref_idc && !h->pixel_shift) {
-        s->me.qpel_put = s->dsp.put_2tap_qpel_pixels_tab;
-        s->me.qpel_avg = s->dsp.avg_2tap_qpel_pixels_tab;
-    } else {
-        s->me.qpel_put = s->dsp.put_h264_qpel_pixels_tab;
-        s->me.qpel_avg = s->dsp.avg_h264_qpel_pixels_tab;
-    }
+    s->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
+    s->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
  
      first_mb_in_slice = get_ue_golomb(&s->gb);
  
@@ -2667,8 +2677,14 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      if (h->pps.sps_id != h->current_sps_id ||
          h->context_reinitialized           ||
          h0->sps_buffers[h->pps.sps_id]->new) {
+        SPS *new_sps = h0->sps_buffers[h->pps.sps_id];
+
          h0->sps_buffers[h->pps.sps_id]->new = 0;
  
+        if (h->sps.chroma_format_idc != new_sps->chroma_format_idc ||
+            h->sps.bit_depth_luma    != new_sps->bit_depth_luma)
+            needs_reinit = 1;
+
          h->current_sps_id = h->pps.sps_id;
          h->sps            = *h0->sps_buffers[h->pps.sps_id];
  
@@ -2707,24 +2723,17 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
          s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
                                                    : AVCOL_RANGE_MPEG;
          if (h->sps.colour_description_present_flag) {
+            if (s->avctx->colorspace != h->sps.colorspace)
+                needs_reinit = 1;
              s->avctx->color_primaries = h->sps.color_primaries;
              s->avctx->color_trc       = h->sps.color_trc;
              s->avctx->colorspace      = h->sps.colorspace;
          }
      }
  
-    ret = get_pixel_format(h);
-    if (ret < 0)
-        return ret;
-    else
-        pix_fmt = ret;
-    if (s->avctx->pix_fmt == PIX_FMT_NONE)
-        s->avctx->pix_fmt = pix_fmt;
-
      if (s->context_initialized &&
          (s->width  != s->avctx->width   ||
           s->height != s->avctx->height  ||
-         pix_fmt   != s->avctx->pix_fmt ||
           needs_reinit                   ||
           av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
  
@@ -2734,12 +2743,14 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
              return AVERROR_INVALIDDATA;
          }
  
-        av_log(h->s.avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
-               "pix_fmt: %d\n", s->width, s->height, pix_fmt);
-
          flush_change(h);
  
-        s->avctx->pix_fmt = pix_fmt;
+        if ((ret = get_pixel_format(h)) < 0)
+            return ret;
+        s->avctx->pix_fmt = ret;
+
+        av_log(h->s.avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
+               "pix_fmt: %d\n", s->width, s->height, s->avctx->pix_fmt);
  
          if ((ret = h264_slice_header_init(h, 1)) < 0) {
              av_log(h->s.avctx, AV_LOG_ERROR,
@@ -2754,6 +2765,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
                     "Cannot (re-)initialize context during parallel decoding.\n");
              return -1;
          }
+
+        if ((ret = get_pixel_format(h)) < 0)
+            return ret;
+        s->avctx->pix_fmt = ret;
+
          if ((ret = h264_slice_header_init(h, 0)) < 0) {
              av_log(h->s.avctx, AV_LOG_ERROR,
                     "h264_slice_header_init() failed\n");
@@ -2871,7 +2887,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
                          av_log_ask_for_sample(s->avctx, NULL);
                          s->picture_structure = last_pic_structure;
                          s->droppable         = last_pic_droppable;
-                        return AVERROR_INVALIDDATA;
+                        return AVERROR_PATCHWELCOME;
                      }
  
                      /* Take ownership of this buffer. Note that if another thread owned
@@ -2897,7 +2913,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
              s->current_picture_ptr->frame_num = h->prev_frame_num;
              ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
              ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1);
-            ff_generate_sliding_window_mmcos(h);
+            if ((ret = ff_generate_sliding_window_mmcos(h, 1)) < 0 &&
+                s->avctx->err_recognition & AV_EF_EXPLODE)
+                return ret;
              if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
                  (s->avctx->err_recognition & AV_EF_EXPLODE))
                  return AVERROR_INVALIDDATA;
@@ -3075,7 +3093,15 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
          }
      }
  
-    if (h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
+    // If frame-mt is enabled, only update mmco tables for the first slice
+    // in a field. Subsequent slices can temporarily clobber h->mmco_index
+    // or h->mmco, which will cause ref list mix-ups and decoding errors
+    // further down the line. This may break decoding if the first slice is
+    // corrupt, thus we only do this if frame-mt is enabled.
+    if (h->nal_ref_idc &&
+        ff_h264_decode_ref_pic_marking(h0, &s->gb,
+                            !(s->avctx->active_thread_type & FF_THREAD_FRAME) ||
+                            h0->current_slice == 0) < 0 &&
          (s->avctx->err_recognition & AV_EF_EXPLODE))
          return AVERROR_INVALIDDATA;