h264: Copy h264chroma dsp context to slice thread copies

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index f56885879264c64bb0d13cce34fb3871dcec6168..542070be938436f505216528c72e9777fe3293e2 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -34,10 +34,12 @@
  #include "mpegvideo.h"
  #include "h264.h"
  #include "h264data.h"
+#include "h264chroma.h"
  #include "h264_mvpred.h"
  #include "golomb.h"
  #include "mathops.h"
  #include "rectangle.h"
+#include "svq3.h"
  #include "thread.h"
  #include "vdpau_internal.h"
  #include "libavutil/avassert.h"
@@ -60,9 +62,18 @@ static const uint8_t div6[QP_MAX_NUM + 1] = {
  };
  
  static const enum AVPixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
+#if CONFIG_H264_DXVA2_HWACCEL
      AV_PIX_FMT_DXVA2_VLD,
+#endif
+#if CONFIG_H264_VAAPI_HWACCEL
      AV_PIX_FMT_VAAPI_VLD,
+#endif
+#if CONFIG_H264_VDA_HWACCEL
      AV_PIX_FMT_VDA_VLD,
+#endif
+#if CONFIG_H264_VDPAU_HWACCEL
+    AV_PIX_FMT_VDPAU,
+#endif
      AV_PIX_FMT_YUVJ420P,
      AV_PIX_FMT_NONE
  };
@@ -290,10 +301,11 @@ static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n,
                                           int height, int y_offset, int list)
  {
      int raw_my        = h->mv_cache[list][scan8[n]][1];
-    int filter_height = (raw_my & 3) ? 2 : 0;
+    int filter_height_up   = (raw_my & 3) ? 2 : 0;
+    int filter_height_down = (raw_my & 3) ? 3 : 0;
      int full_my       = (raw_my >> 2) + y_offset;
-    int top           = full_my - filter_height;
-    int bottom        = full_my + filter_height + height;
+    int top           = full_my - filter_height_up;
+    int bottom        = full_my + filter_height_down + height;
  
      return FFMAX(abs(top), bottom);
  }
@@ -486,11 +498,11 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
          full_my                <          0 - extra_height ||
          full_mx + 16 /*FIXME*/ > pic_width  + extra_width  ||
          full_my + 16 /*FIXME*/ > pic_height + extra_height) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                h->mb_linesize,
-                                16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
-                                full_my - 2, pic_width, pic_height);
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 src_y - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                 h->mb_linesize,
+                                 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2,
+                                 full_my - 2, pic_width, pic_height);
          src_y = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          emu   = 1;
      }
@@ -505,12 +517,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
      if (chroma_idc == 3 /* yuv444 */) {
          src_cb = pic->f.data[1] + offset;
          if (emu) {
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                    src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                    h->mb_linesize,
-                                    16 + 5, 16 + 5 /*FIXME*/,
-                                    full_mx - 2, full_my - 2,
-                                    pic_width, pic_height);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                     src_cb - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                     h->mb_linesize,
+                                     16 + 5, 16 + 5 /*FIXME*/,
+                                     full_mx - 2, full_my - 2,
+                                     pic_width, pic_height);
              src_cb = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          }
          qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); // FIXME try variable height perhaps?
@@ -519,12 +531,12 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
  
          src_cr = pic->f.data[2] + offset;
          if (emu) {
-            s->dsp.emulated_edge_mc(s->edge_emu_buffer,
-                                    src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
-                                    h->mb_linesize,
-                                    16 + 5, 16 + 5 /*FIXME*/,
-                                    full_mx - 2, full_my - 2,
-                                    pic_width, pic_height);
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                     src_cr - (2 << pixel_shift) - 2 * h->mb_linesize,
+                                     h->mb_linesize,
+                                     16 + 5, 16 + 5 /*FIXME*/,
+                                     full_mx - 2, full_my - 2,
+                                     pic_width, pic_height);
              src_cr = s->edge_emu_buffer + (2 << pixel_shift) + 2 * h->mb_linesize;
          }
          qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); // FIXME try variable height perhaps?
@@ -546,9 +558,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
               (my >> ysh) * h->mb_uvlinesize;
  
      if (emu) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
-                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
-                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
+                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
+                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
          src_cb = s->edge_emu_buffer;
      }
      chroma_op(dest_cb, src_cb, h->mb_uvlinesize,
@@ -556,9 +568,9 @@ static av_always_inline void mc_dir_part(H264Context *h, Picture *pic,
                mx & 7, (my << (chroma_idc == 2 /* yuv422 */)) & 7);
  
      if (emu) {
-        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
-                                9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
-                                pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
+                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
+                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
          src_cr = s->edge_emu_buffer;
      }
      chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
@@ -735,15 +747,15 @@ static av_always_inline void prefetch_motion(H264Context *h, int list,
          int off       = (mx << pixel_shift) +
                          (my + (s->mb_x & 3) * 4) * h->mb_linesize +
                          (64 << pixel_shift);
-        s->dsp.prefetch(src[0] + off, s->linesize, 4);
+        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
          if (chroma_idc == 3 /* yuv444 */) {
-            s->dsp.prefetch(src[1] + off, s->linesize, 4);
-            s->dsp.prefetch(src[2] + off, s->linesize, 4);
+            s->vdsp.prefetch(src[1] + off, s->linesize, 4);
+            s->vdsp.prefetch(src[2] + off, s->linesize, 4);
          } else {
              off = ((mx >> 1) << pixel_shift) +
                    ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize +
                    (64 << pixel_shift);
-            s->dsp.prefetch(src[1] + off, src[2] - src[1], 2);
+            s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
          }
      }
  }
@@ -966,6 +978,8 @@ static av_cold void common_init(H264Context *h)
      s->codec_id = s->avctx->codec->id;
  
      ff_h264dsp_init(&h->h264dsp, 8, 1);
+    ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
+    ff_h264qpel_init(&h->h264qpel, 8);
      ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
  
      h->dequant_coeff_pps = -1;
@@ -973,6 +987,7 @@ static av_cold void common_init(H264Context *h)
  
      /* needed so that IDCT permutation is known early */
      ff_dsputil_init(&s->dsp, s->avctx);
+    ff_videodsp_init(&s->vdsp, 8);
  
      memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
      memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
@@ -1135,6 +1150,10 @@ static int decode_init_thread_copy(AVCodecContext *avctx)
      memcpy(&to->start_field, &from->start_field,                        \
             (char *)&to->end_field - (char *)&to->start_field)
  
+static int h264_slice_header_init(H264Context *, int);
+
+static int h264_set_parameter_from_sps(H264Context *h);
+
  static int decode_update_thread_context(AVCodecContext *dst,
                                          const AVCodecContext *src)
  {
@@ -1146,11 +1165,42 @@ static int decode_update_thread_context(AVCodecContext *dst,
      if (dst == src || !s1->context_initialized)
          return 0;
  
+    if (inited &&
+        (s->width      != s1->width      ||
+         s->height     != s1->height     ||
+         s->mb_width   != s1->mb_width   ||
+         s->mb_height  != s1->mb_height  ||
+         h->sps.bit_depth_luma    != h1->sps.bit_depth_luma    ||
+         h->sps.chroma_format_idc != h1->sps.chroma_format_idc ||
+         h->sps.colorspace        != h1->sps.colorspace)) {
+
+        av_freep(&h->bipred_scratchpad);
+
+        s->width     = s1->width;
+        s->height    = s1->height;
+        s->mb_height = s1->mb_height;
+        h->b_stride  = h1->b_stride;
+
+        if ((err = h264_slice_header_init(h, 1)) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
+            return err;
+        }
+        h->context_reinitialized = 1;
+
+        /* update linesize on resize for h264. The h264 decoder doesn't
+         * necessarily call ff_MPV_frame_start in the new thread */
+        s->linesize   = s1->linesize;
+        s->uvlinesize = s1->uvlinesize;
+
+        /* copy block_offset since frame_start may not be called */
+        memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
+        h264_set_parameter_from_sps(h);
+    }
+
      err = ff_mpeg_update_thread_context(dst, src);
      if (err)
          return err;
  
-    // FIXME handle width/height changing
      if (!inited) {
          for (i = 0; i < MAX_SPS_COUNT; i++)
              av_freep(h->sps_buffers + i);
@@ -1216,7 +1266,7 @@ static int decode_update_thread_context(AVCodecContext *dst,
  
      // reference lists
      copy_fields(h, h1, ref_count, list_count);
-    copy_fields(h, h1, ref_list, intra_gb);
+    copy_fields(h, h1, ref2frm, intra_gb);
      copy_fields(h, h1, short_ref, cabac_init_idc);
  
      copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);
@@ -1368,7 +1418,6 @@ static void decode_postinit(H264Context *h, int setup_finished)
              cur->f.repeat_pict = 1;
              break;
          case SEI_PIC_STRUCT_FRAME_DOUBLING:
-            // Force progressive here, doubling interlaced frame is a bad idea.
              cur->f.repeat_pict = 2;
              break;
          case SEI_PIC_STRUCT_FRAME_TRIPLING:
@@ -1713,7 +1762,7 @@ static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
      }
  }
  
-static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
+static av_always_inline int dctcoef_get(int16_t *mb, int high_bit_depth,
                                          int index)
  {
      if (high_bit_depth) {
@@ -1722,7 +1771,7 @@ static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth,
          return AV_RN16A(mb + index);
  }
  
-static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth,
+static av_always_inline void dctcoef_set(int16_t *mb, int high_bit_depth,
                                           int index, int value)
  {
      if (high_bit_depth) {
@@ -1741,8 +1790,8 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
                                                         uint8_t *dest_y, int p)
  {
      MpegEncContext *const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
-    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
+    void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride);
      int i;
      int qscale = p == 0 ? s->qscale : h->chroma_qp[p - 1];
      block_offset += 16 * p;
@@ -1858,7 +1907,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
                                                      uint8_t *dest_y, int p)
  {
      MpegEncContext *const s = &h->s;
-    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
+    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
      int i;
      block_offset += 16 * p;
      if (!IS_INTRA4x4(mb_type)) {
@@ -2095,15 +2144,9 @@ static void idr(H264Context *h)
  }
  
  /* forget old pics after a seek */
-static void flush_dpb(AVCodecContext *avctx)
+static void flush_change(H264Context *h)
  {
-    H264Context *h = avctx->priv_data;
      int i;
-    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
-        if (h->delayed_pic[i])
-            h->delayed_pic[i]->f.reference = 0;
-        h->delayed_pic[i] = NULL;
-    }
      for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
          h->last_pocs[i] = INT_MIN;
      h->outputed_poc = h->next_outputed_poc = INT_MIN;
@@ -2112,7 +2155,26 @@ static void flush_dpb(AVCodecContext *avctx)
      if (h->s.current_picture_ptr)
          h->s.current_picture_ptr->f.reference = 0;
      h->s.first_field = 0;
+    memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
+    memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
+    memset(h->default_ref_list[0], 0, sizeof(h->default_ref_list[0]));
+    memset(h->default_ref_list[1], 0, sizeof(h->default_ref_list[1]));
      ff_h264_reset_sei(h);
+}
+
+/* forget old pics after a seek */
+static void flush_dpb(AVCodecContext *avctx)
+{
+    H264Context *h = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
+        if (h->delayed_pic[i])
+            h->delayed_pic[i]->f.reference = 0;
+        h->delayed_pic[i] = NULL;
+    }
+
+    flush_change(h);
      ff_mpeg_flush(avctx);
  }
  
@@ -2292,8 +2354,10 @@ static int field_end(H264Context *h, int in_setup)
  /**
   * Replicate H264 "master" context to thread contexts.
   */
-static void clone_slice(H264Context *dst, H264Context *src)
+static int clone_slice(H264Context *dst, H264Context *src)
  {
+    int ret;
+
      memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
      dst->s.current_picture_ptr = src->s.current_picture_ptr;
      dst->s.current_picture     = src->s.current_picture;
@@ -2301,6 +2365,13 @@ static void clone_slice(H264Context *dst, H264Context *src)
      dst->s.uvlinesize          = src->s.uvlinesize;
      dst->s.first_field         = src->s.first_field;
  
+    if (!dst->s.edge_emu_buffer &&
+        (ret = ff_mpv_frame_size_alloc(&dst->s, dst->s.linesize))) {
+        av_log(dst->s.avctx, AV_LOG_ERROR,
+               "Failed to allocate scratch buffers\n");
+        return ret;
+    }
+
      dst->prev_poc_msb          = src->prev_poc_msb;
      dst->prev_poc_lsb          = src->prev_poc_lsb;
      dst->prev_frame_num_offset = src->prev_frame_num_offset;
@@ -2310,10 +2381,11 @@ static void clone_slice(H264Context *dst, H264Context *src)
      memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
      memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
      memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
-    memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
  
      memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
      memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
+
+    return 0;
  }
  
  /**
@@ -2376,10 +2448,13 @@ static int h264_set_parameter_from_sps(H264Context *h)
  
              ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
                              h->sps.chroma_format_idc);
+            ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
+            ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
              ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma,
                                h->sps.chroma_format_idc);
              s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
              ff_dsputil_init(&s->dsp, s->avctx);
+            ff_videodsp_init(&s->vdsp, h->sps.bit_depth_luma);
          } else {
              av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
                     h->sps.bit_depth_luma);
@@ -2389,6 +2464,129 @@ static int h264_set_parameter_from_sps(H264Context *h)
      return 0;
  }
  
+static enum PixelFormat get_pixel_format(H264Context *h)
+{
+    MpegEncContext *const s  = &h->s;
+    switch (h->sps.bit_depth_luma) {
+    case 9:
+        if (CHROMA444) {
+            if (s->avctx->colorspace == AVCOL_SPC_RGB) {
+                return AV_PIX_FMT_GBRP9;
+            } else
+                return AV_PIX_FMT_YUV444P9;
+        } else if (CHROMA422)
+            return AV_PIX_FMT_YUV422P9;
+        else
+            return AV_PIX_FMT_YUV420P9;
+        break;
+    case 10:
+        if (CHROMA444) {
+            if (s->avctx->colorspace == AVCOL_SPC_RGB) {
+                return AV_PIX_FMT_GBRP10;
+            } else
+                return AV_PIX_FMT_YUV444P10;
+        } else if (CHROMA422)
+            return AV_PIX_FMT_YUV422P10;
+        else
+            return AV_PIX_FMT_YUV420P10;
+        break;
+    case 8:
+        if (CHROMA444) {
+            if (s->avctx->colorspace == AVCOL_SPC_RGB) {
+                return AV_PIX_FMT_GBRP;
+            } else
+                return s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
+                                                                 : AV_PIX_FMT_YUV444P;
+        } else if (CHROMA422) {
+            return s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
+                                                             : AV_PIX_FMT_YUV422P;
+        } else {
+            return s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts ?
+                                        s->avctx->codec->pix_fmts :
+                                        s->avctx->color_range == AVCOL_RANGE_JPEG ?
+                                        hwaccel_pixfmt_list_h264_jpeg_420 :
+                                        ff_hwaccel_pixfmt_list_420);
+        }
+        break;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
+        return AVERROR_INVALIDDATA;
+    }
+}
+
+static int h264_slice_header_init(H264Context *h, int reinit)
+{
+    MpegEncContext *const s  = &h->s;
+    int i, ret;
+
+    avcodec_set_dimensions(s->avctx, s->width, s->height);
+    s->avctx->sample_aspect_ratio = h->sps.sar;
+    av_assert0(s->avctx->sample_aspect_ratio.den);
+
+    if (h->sps.timing_info_present_flag) {
+        int64_t den = h->sps.time_scale;
+        if (h->x264_build < 44U)
+            den *= 2;
+        av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
+                  h->sps.num_units_in_tick, den, 1 << 30);
+    }
+
+    s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
+
+    if (reinit) {
+        free_tables(h, 0);
+        if ((ret = ff_MPV_common_frame_size_change(s)) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_frame_size_change() failed.\n");
+            return ret;
+        }
+    } else {
+        if ((ret = ff_MPV_common_init(s)) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
+            return ret;
+        }
+    }
+    s->first_field = 0;
+    h->prev_interlaced_frame = 1;
+
+    init_scan_tables(h);
+    if (ff_h264_alloc_tables(h) < 0) {
+        av_log(h->s.avctx, AV_LOG_ERROR,
+               "Could not allocate memory for h264\n");
+        return AVERROR(ENOMEM);
+    }
+
+    if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) {
+        if (context_init(h) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
+            return -1;
+        }
+    } else {
+        for (i = 1; i < s->slice_context_count; i++) {
+            H264Context *c;
+            c = h->thread_context[i] = av_malloc(sizeof(H264Context));
+            memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
+            memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+            c->h264dsp     = h->h264dsp;
+            c->h264qpel    = h->h264qpel;
+            c->h264chroma  = h->h264chroma;
+            c->sps         = h->sps;
+            c->pps         = h->pps;
+            c->pixel_shift = h->pixel_shift;
+            init_scan_tables(c);
+            clone_tables(c, h, i);
+        }
+
+        for (i = 0; i < s->slice_context_count; i++)
+            if (context_init(h->thread_context[i]) < 0) {
+                av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
+                return -1;
+            }
+    }
+
+    return 0;
+}
+
  /**
   * Decode a slice header.
   * This will also call ff_MPV_common_init() and frame_start() as needed.
@@ -2409,16 +2607,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      unsigned int slice_type, tmp, i, j;
      int default_ref_list_done = 0;
      int last_pic_structure, last_pic_droppable;
+    int needs_reinit = 0;
  
-    /* FIXME: 2tap qpel isn't implemented for high bit depth. */
-    if ((s->avctx->flags2 & CODEC_FLAG2_FAST) &&
-        !h->nal_ref_idc && !h->pixel_shift) {
-        s->me.qpel_put = s->dsp.put_2tap_qpel_pixels_tab;
-        s->me.qpel_avg = s->dsp.avg_2tap_qpel_pixels_tab;
-    } else {
-        s->me.qpel_put = s->dsp.put_h264_qpel_pixels_tab;
-        s->me.qpel_avg = s->dsp.avg_h264_qpel_pixels_tab;
-    }
+    s->me.qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
+    s->me.qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
  
      first_mb_in_slice = get_ue_golomb(&s->gb);
  
@@ -2483,9 +2675,16 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      }
  
      if (h->pps.sps_id != h->current_sps_id ||
+        h->context_reinitialized           ||
          h0->sps_buffers[h->pps.sps_id]->new) {
+        SPS *new_sps = h0->sps_buffers[h->pps.sps_id];
+
          h0->sps_buffers[h->pps.sps_id]->new = 0;
  
+        if (h->sps.chroma_format_idc != new_sps->chroma_format_idc ||
+            h->sps.bit_depth_luma    != new_sps->bit_depth_luma)
+            needs_reinit = 1;
+
          h->current_sps_id = h->pps.sps_id;
          h->sps            = *h0->sps_buffers[h->pps.sps_id];
  
@@ -2497,6 +2696,10 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
      s->avctx->level   = h->sps.level_idc;
      s->avctx->refs    = h->sps.ref_frame_count;
  
+    if (s->mb_width  != h->sps.mb_width ||
+        s->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
+        needs_reinit = 1;
+
      s->mb_width  = h->sps.mb_width;
      s->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
  
@@ -2516,137 +2719,61 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
          s->height = s->avctx->height;
      }
  
-    if (s->context_initialized &&
-        (s->width != s->avctx->width || s->height != s->avctx->height ||
-         av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
-        if (h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
-            av_log_missing_feature(s->avctx,
-                                   "Width/height changing with threads", 0);
-            return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
+    if (h->sps.video_signal_type_present_flag) {
+        s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
+                                                  : AVCOL_RANGE_MPEG;
+        if (h->sps.colour_description_present_flag) {
+            if (s->avctx->colorspace != h->sps.colorspace)
+                needs_reinit = 1;
+            s->avctx->color_primaries = h->sps.color_primaries;
+            s->avctx->color_trc       = h->sps.color_trc;
+            s->avctx->colorspace      = h->sps.colorspace;
          }
-        free_tables(h, 0);
-        flush_dpb(s->avctx);
-        ff_MPV_common_end(s);
      }
-    if (!s->context_initialized) {
-        if (h != h0) {
-            av_log(h->s.avctx, AV_LOG_ERROR,
-                   "Cannot (re-)initialize context during parallel decoding.\n");
-            return -1;
-        }
  
-        avcodec_set_dimensions(s->avctx, s->width, s->height);
-        s->avctx->sample_aspect_ratio = h->sps.sar;
-        av_assert0(s->avctx->sample_aspect_ratio.den);
-
-        if (h->sps.video_signal_type_present_flag) {
-            s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
-                                                      : AVCOL_RANGE_MPEG;
-            if (h->sps.colour_description_present_flag) {
-                s->avctx->color_primaries = h->sps.color_primaries;
-                s->avctx->color_trc       = h->sps.color_trc;
-                s->avctx->colorspace      = h->sps.colorspace;
-            }
-        }
-
-        if (h->sps.timing_info_present_flag) {
-            int64_t den = h->sps.time_scale;
-            if (h->x264_build < 44U)
-                den *= 2;
-            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
-                      h->sps.num_units_in_tick, den, 1 << 30);
-        }
+    if (s->context_initialized &&
+        (s->width  != s->avctx->width   ||
+         s->height != s->avctx->height  ||
+         needs_reinit                   ||
+         av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
  
-        switch (h->sps.bit_depth_luma) {
-        case 9:
-            if (CHROMA444) {
-                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
-                    s->avctx->pix_fmt = AV_PIX_FMT_GBRP9;
-                } else
-                    s->avctx->pix_fmt = AV_PIX_FMT_YUV444P9;
-            } else if (CHROMA422)
-                s->avctx->pix_fmt = AV_PIX_FMT_YUV422P9;
-            else
-                s->avctx->pix_fmt = AV_PIX_FMT_YUV420P9;
-            break;
-        case 10:
-            if (CHROMA444) {
-                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
-                    s->avctx->pix_fmt = AV_PIX_FMT_GBRP10;
-                } else
-                    s->avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
-            } else if (CHROMA422)
-                s->avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
-            else
-                s->avctx->pix_fmt = AV_PIX_FMT_YUV420P10;
-            break;
-        case 8:
-            if (CHROMA444) {
-                if (s->avctx->colorspace == AVCOL_SPC_RGB) {
-                    s->avctx->pix_fmt = AV_PIX_FMT_GBRP;
-                } else
-                    s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
-                                                                                  : AV_PIX_FMT_YUV444P;
-            } else if (CHROMA422) {
-                s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
-                                                                              : AV_PIX_FMT_YUV422P;
-            } else {
-                s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
-                                                         s->avctx->codec->pix_fmts ?
-                                                         s->avctx->codec->pix_fmts :
-                                                         s->avctx->color_range == AVCOL_RANGE_JPEG ?
-                                                         hwaccel_pixfmt_list_h264_jpeg_420 :
-                                                         ff_hwaccel_pixfmt_list_420);
-            }
-            break;
-        default:
-            av_log(s->avctx, AV_LOG_ERROR,
-                   "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
+        if (h != h0) {
+            av_log(s->avctx, AV_LOG_ERROR, "changing width/height on "
+                   "slice %d\n", h0->current_slice + 1);
              return AVERROR_INVALIDDATA;
          }
  
-        s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id,
-                                            s->avctx->pix_fmt);
+        flush_change(h);
  
-        if (ff_MPV_common_init(s) < 0) {
-            av_log(h->s.avctx, AV_LOG_ERROR, "ff_MPV_common_init() failed.\n");
-            return -1;
-        }
-        s->first_field = 0;
-        h->prev_interlaced_frame = 1;
+        if ((ret = get_pixel_format(h)) < 0)
+            return ret;
+        s->avctx->pix_fmt = ret;
  
-        init_scan_tables(h);
-        if (ff_h264_alloc_tables(h) < 0) {
+        av_log(h->s.avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
+               "pix_fmt: %d\n", s->width, s->height, s->avctx->pix_fmt);
+
+        if ((ret = h264_slice_header_init(h, 1)) < 0) {
              av_log(h->s.avctx, AV_LOG_ERROR,
-                   "Could not allocate memory for h264\n");
-            return AVERROR(ENOMEM);
+                   "h264_slice_header_init() failed\n");
+            return ret;
+        }
+        h->context_reinitialized = 1;
+    }
+    if (!s->context_initialized) {
+        if (h != h0) {
+            av_log(h->s.avctx, AV_LOG_ERROR,
+                   "Cannot (re-)initialize context during parallel decoding.\n");
+            return -1;
          }
  
-        if (!HAVE_THREADS || !(s->avctx->active_thread_type & FF_THREAD_SLICE)) {
-            if (context_init(h) < 0) {
-                av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
-                return -1;
-            }
-        } else {
-            for (i = 1; i < s->slice_context_count; i++) {
-                H264Context *c;
-                c = h->thread_context[i] = av_malloc(sizeof(H264Context));
-                memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
-                memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
-                c->h264dsp     = h->h264dsp;
-                c->sps         = h->sps;
-                c->pps         = h->pps;
-                c->pixel_shift = h->pixel_shift;
-                init_scan_tables(c);
-                clone_tables(c, h, i);
-            }
+        if ((ret = get_pixel_format(h)) < 0)
+            return ret;
+        s->avctx->pix_fmt = ret;
  
-            for (i = 0; i < s->slice_context_count; i++)
-                if (context_init(h->thread_context[i]) < 0) {
-                    av_log(h->s.avctx, AV_LOG_ERROR,
-                           "context_init() failed.\n");
-                    return -1;
-                }
+        if ((ret = h264_slice_header_init(h, 0)) < 0) {
+            av_log(h->s.avctx, AV_LOG_ERROR,
+                   "h264_slice_header_init() failed\n");
+            return ret;
          }
      }
  
@@ -2760,7 +2887,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
                          av_log_ask_for_sample(s->avctx, NULL);
                          s->picture_structure = last_pic_structure;
                          s->droppable         = last_pic_droppable;
-                        return AVERROR_INVALIDDATA;
+                        return AVERROR_PATCHWELCOME;
                      }
  
                      /* Take ownership of this buffer. Note that if another thread owned
@@ -2786,7 +2913,9 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
              s->current_picture_ptr->frame_num = h->prev_frame_num;
              ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 0);
              ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX, 1);
-            ff_generate_sliding_window_mmcos(h);
+            if ((ret = ff_generate_sliding_window_mmcos(h, 1)) < 0 &&
+                s->avctx->err_recognition & AV_EF_EXPLODE)
+                return ret;
              if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
                  (s->avctx->err_recognition & AV_EF_EXPLODE))
                  return AVERROR_INVALIDDATA;
@@ -2847,8 +2976,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
              ff_release_unused_pictures(s, 0);
          }
      }
-    if (h != h0)
-        clone_slice(h, h0);
+    if (h != h0 && (ret = clone_slice(h, h0)) < 0)
+        return ret;
  
      s->current_picture_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
  
@@ -2964,7 +3093,15 @@ static int decode_slice_header(H264Context *h, H264Context *h0)
          }
      }
  
-    if (h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
+    // If frame-mt is enabled, only update mmco tables for the first slice
+    // in a field. Subsequent slices can temporarily clobber h->mmco_index
+    // or h->mmco, which will cause ref list mix-ups and decoding errors
+    // further down the line. This may break decoding if the first slice is
+    // corrupt, thus we only do this if frame-mt is enabled.
+    if (h->nal_ref_idc &&
+        ff_h264_decode_ref_pic_marking(h0, &s->gb,
+                            !(s->avctx->active_thread_type & FF_THREAD_FRAME) ||
+                            h0->current_slice == 0) < 0 &&
          (s->avctx->err_recognition & AV_EF_EXPLODE))
          return AVERROR_INVALIDDATA;
  
@@ -4086,6 +4223,7 @@ out:
              decode_postinit(h, 1);
  
          field_end(h, 0);
+        h->context_reinitialized = 0;
  
          if (!h->next_output_pic) {
              /* Wait for second field. */