vaapi_h265: Add support for AUD NAL units

[ffmpeg] / libavcodec / h264_slice.c
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c

index ec8a82c0a64a29a68e8ad09e5e63af093f29aeaf..5dd01d836e95b3f83d3fbe928dd82a00bf8259fb 100644 (file)
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -26,18 +26,22 @@
   */
  
  #include "libavutil/avassert.h"
+#include "libavutil/display.h"
  #include "libavutil/imgutils.h"
+#include "libavutil/stereo3d.h"
  #include "libavutil/timer.h"
  #include "internal.h"
  #include "cabac.h"
  #include "cabac_functions.h"
  #include "error_resilience.h"
+#include "golomb_legacy.h"
  #include "avcodec.h"
  #include "h264.h"
+#include "h264dec.h"
  #include "h264data.h"
  #include "h264chroma.h"
  #include "h264_mvpred.h"
-#include "golomb.h"
+#include "h264_ps.h"
  #include "mathops.h"
  #include "mpegutils.h"
  #include "rectangle.h"
@@ -374,6 +378,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
      h->avctx->coded_width   = h1->avctx->coded_width;
      h->avctx->width         = h1->avctx->width;
      h->avctx->height        = h1->avctx->height;
+    h->width_from_caller    = h1->width_from_caller;
+    h->height_from_caller   = h1->height_from_caller;
      h->coded_picture_number = h1->coded_picture_number;
      h->first_field          = h1->first_field;
      h->picture_structure    = h1->picture_structure;
@@ -397,6 +403,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
  
      h->enable_er       = h1->enable_er;
      h->workaround_bugs = h1->workaround_bugs;
+    h->x264_build      = h1->x264_build;
      h->droppable       = h1->droppable;
  
      // extradata/NAL handling
@@ -410,7 +417,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
      memcpy(h->delayed_pic, h1->delayed_pic, sizeof(h->delayed_pic));
      memcpy(h->last_pocs,   h1->last_pocs,   sizeof(h->last_pocs));
  
-    h->next_output_pic   = h1->next_output_pic;
      h->next_outputed_poc = h1->next_outputed_poc;
  
      memcpy(h->mmco, h1->mmco, sizeof(h->mmco));
@@ -468,6 +474,11 @@ static int h264_frame_start(H264Context *h)
  
      pic->f->pict_type = h->slice_ctx[0].slice_type;
  
+    pic->f->crop_left   = h->crop_left;
+    pic->f->crop_right  = h->crop_right;
+    pic->f->crop_top    = h->crop_top;
+    pic->f->crop_bottom = h->crop_bottom;
+
      if (CONFIG_ERROR_RESILIENCE && h->enable_er)
          ff_er_frame_start(&h->slice_ctx[0].er);
  
@@ -495,12 +506,13 @@ static int h264_frame_start(H264Context *h)
  
      h->cur_pic_ptr->field_poc[0] = h->cur_pic_ptr->field_poc[1] = INT_MAX;
  
-    h->next_output_pic = NULL;
-
      h->postpone_filter = 0;
  
      h->mb_aff_frame = h->ps.sps->mb_aff && (h->picture_structure == PICT_FRAME);
  
+    if (h->sei.unregistered.x264_build >= 0)
+        h->x264_build = h->sei.unregistered.x264_build;
+
      assert(h->cur_pic_ptr->long_ref == 0);
  
      return 0;
@@ -709,10 +721,11 @@ static void init_scan_tables(H264Context *h)
  static enum AVPixelFormat get_pixel_format(H264Context *h)
  {
  #define HWACCEL_MAX (CONFIG_H264_DXVA2_HWACCEL + \
-                     CONFIG_H264_D3D11VA_HWACCEL + \
+                     (CONFIG_H264_D3D11VA_HWACCEL * 2) + \
                       CONFIG_H264_VAAPI_HWACCEL + \
                       (CONFIG_H264_VDA_HWACCEL * 2) + \
-                     CONFIG_H264_VDPAU_HWACCEL)
+                     CONFIG_H264_VDPAU_HWACCEL     + \
+                     CONFIG_H264_CUVID_HWACCEL)
      enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
      const enum AVPixelFormat *choices = pix_fmts;
  
@@ -742,6 +755,9 @@ static enum AVPixelFormat get_pixel_format(H264Context *h)
      case 8:
  #if CONFIG_H264_VDPAU_HWACCEL
          *fmt++ = AV_PIX_FMT_VDPAU;
+#endif
+#if CONFIG_H264_CUVID_HWACCEL
+        *fmt++ = AV_PIX_FMT_CUDA;
  #endif
          if (CHROMA444(h)) {
              if (h->avctx->colorspace == AVCOL_SPC_RGB)
@@ -761,6 +777,7 @@ static enum AVPixelFormat get_pixel_format(H264Context *h)
  #endif
  #if CONFIG_H264_D3D11VA_HWACCEL
              *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+            *fmt++ = AV_PIX_FMT_D3D11;
  #endif
  #if CONFIG_H264_VAAPI_HWACCEL
              *fmt++ = AV_PIX_FMT_VAAPI;
@@ -792,37 +809,37 @@ static enum AVPixelFormat get_pixel_format(H264Context *h)
  static int init_dimensions(H264Context *h)
  {
      SPS *sps = h->ps.sps;
-    int width  = h->width  - (sps->crop_right + sps->crop_left);
-    int height = h->height - (sps->crop_top   + sps->crop_bottom);
+    int cr = sps->crop_right;
+    int cl = sps->crop_left;
+    int ct = sps->crop_top;
+    int cb = sps->crop_bottom;
+    int width  = h->width  - (cr + cl);
+    int height = h->height - (ct + cb);
  
      /* handle container cropping */
-    if (FFALIGN(h->avctx->width,  16) == FFALIGN(width,  16) &&
-        FFALIGN(h->avctx->height, 16) == FFALIGN(height, 16)) {
-        width  = h->avctx->width;
-        height = h->avctx->height;
-    }
-
-    if (width <= 0 || height <= 0) {
-        av_log(h->avctx, AV_LOG_ERROR, "Invalid cropped dimensions: %dx%d.\n",
-               width, height);
-        if (h->avctx->err_recognition & AV_EF_EXPLODE)
-            return AVERROR_INVALIDDATA;
-
-        av_log(h->avctx, AV_LOG_WARNING, "Ignoring cropping information.\n");
-        sps->crop_bottom =
-        sps->crop_top    =
-        sps->crop_right  =
-        sps->crop_left   =
-        sps->crop        = 0;
-
-        width  = h->width;
-        height = h->height;
+    if (h->width_from_caller > 0 && h->height_from_caller > 0     &&
+        !sps->crop_top && !sps->crop_left                         &&
+        FFALIGN(h->width_from_caller,  16) == FFALIGN(width,  16) &&
+        FFALIGN(h->height_from_caller, 16) == FFALIGN(height, 16)) {
+        width  = h->width_from_caller;
+        height = h->height_from_caller;
+        cl = 0;
+        ct = 0;
+        cr = h->width - width;
+        cb = h->height - height;
+    } else {
+        h->width_from_caller  = 0;
+        h->height_from_caller = 0;
      }
  
      h->avctx->coded_width  = h->width;
      h->avctx->coded_height = h->height;
      h->avctx->width        = width;
      h->avctx->height       = height;
+    h->crop_right          = cr;
+    h->crop_left           = cl;
+    h->crop_top            = ct;
+    h->crop_bottom         = cb;
  
      return 0;
  }
@@ -838,7 +855,7 @@ static int h264_slice_header_init(H264Context *h)
  
      if (sps->timing_info_present_flag) {
          int64_t den = sps->time_scale;
-        if (h->sei.unregistered.x264_build < 44U)
+        if (h->x264_build < 44U)
              den *= 2;
          av_reduce(&h->avctx->framerate.den, &h->avctx->framerate.num,
                    sps->num_units_in_tick, den, 1 << 30);
@@ -922,11 +939,11 @@ static int h264_init_ps(H264Context *h, const H264SliceContext *sl)
      h->avctx->refs    = sps->ref_frame_count;
  
      if (h->mb_width  != sps->mb_width ||
-        h->mb_height != sps->mb_height * (2 - sps->frame_mbs_only_flag))
+        h->mb_height != sps->mb_height)
          needs_reinit = 1;
  
      h->mb_width  = sps->mb_width;
-    h->mb_height = sps->mb_height * (2 - sps->frame_mbs_only_flag);
+    h->mb_height = sps->mb_height;
      h->mb_num    = h->mb_width * h->mb_height;
      h->mb_stride = h->mb_width + 1;
  
@@ -984,6 +1001,302 @@ static int h264_init_ps(H264Context *h, const H264SliceContext *sl)
      return 0;
  }
  
+static int h264_export_frame_props(H264Context *h)
+{
+    const SPS *sps = h->ps.sps;
+    H264Picture *cur = h->cur_pic_ptr;
+
+    cur->f->interlaced_frame = 0;
+    cur->f->repeat_pict      = 0;
+
+    /* Signal interlacing information externally. */
+    /* Prioritize picture timing SEI information over used
+     * decoding process if it exists. */
+
+    if (sps->pic_struct_present_flag && h->sei.picture_timing.present) {
+        H264SEIPictureTiming *pt = &h->sei.picture_timing;
+        switch (pt->pic_struct) {
+        case H264_SEI_PIC_STRUCT_FRAME:
+            break;
+        case H264_SEI_PIC_STRUCT_TOP_FIELD:
+        case H264_SEI_PIC_STRUCT_BOTTOM_FIELD:
+            cur->f->interlaced_frame = 1;
+            break;
+        case H264_SEI_PIC_STRUCT_TOP_BOTTOM:
+        case H264_SEI_PIC_STRUCT_BOTTOM_TOP:
+            if (FIELD_OR_MBAFF_PICTURE(h))
+                cur->f->interlaced_frame = 1;
+            else
+                // try to flag soft telecine progressive
+                cur->f->interlaced_frame = h->prev_interlaced_frame;
+            break;
+        case H264_SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
+        case H264_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
+            /* Signal the possibility of telecined film externally
+             * (pic_struct 5,6). From these hints, let the applications
+             * decide if they apply deinterlacing. */
+            cur->f->repeat_pict = 1;
+            break;
+        case H264_SEI_PIC_STRUCT_FRAME_DOUBLING:
+            cur->f->repeat_pict = 2;
+            break;
+        case H264_SEI_PIC_STRUCT_FRAME_TRIPLING:
+            cur->f->repeat_pict = 4;
+            break;
+        }
+
+        if ((pt->ct_type & 3) &&
+            pt->pic_struct <= H264_SEI_PIC_STRUCT_BOTTOM_TOP)
+            cur->f->interlaced_frame = (pt->ct_type & (1 << 1)) != 0;
+    } else {
+        /* Derive interlacing flag from used decoding process. */
+        cur->f->interlaced_frame = FIELD_OR_MBAFF_PICTURE(h);
+    }
+    h->prev_interlaced_frame = cur->f->interlaced_frame;
+
+    if (cur->field_poc[0] != cur->field_poc[1]) {
+        /* Derive top_field_first from field pocs. */
+        cur->f->top_field_first = cur->field_poc[0] < cur->field_poc[1];
+    } else {
+        if (cur->f->interlaced_frame ||
+            (sps->pic_struct_present_flag && h->sei.picture_timing.present)) {
+            /* Use picture timing SEI information. Even if it is a
+             * information of a past frame, better than nothing. */
+            if (h->sei.picture_timing.pic_struct == H264_SEI_PIC_STRUCT_TOP_BOTTOM ||
+                h->sei.picture_timing.pic_struct == H264_SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
+                cur->f->top_field_first = 1;
+            else
+                cur->f->top_field_first = 0;
+        } else {
+            /* Most likely progressive */
+            cur->f->top_field_first = 0;
+        }
+    }
+
+    if (h->sei.frame_packing.present &&
+        h->sei.frame_packing.arrangement_type >= 0 &&
+        h->sei.frame_packing.arrangement_type <= 6 &&
+        h->sei.frame_packing.content_interpretation_type > 0 &&
+        h->sei.frame_packing.content_interpretation_type < 3) {
+        H264SEIFramePacking *fp = &h->sei.frame_packing;
+        AVStereo3D *stereo = av_stereo3d_create_side_data(cur->f);
+        if (!stereo)
+            return AVERROR(ENOMEM);
+
+        switch (fp->arrangement_type) {
+        case 0:
+            stereo->type = AV_STEREO3D_CHECKERBOARD;
+            break;
+        case 1:
+            stereo->type = AV_STEREO3D_COLUMNS;
+            break;
+        case 2:
+            stereo->type = AV_STEREO3D_LINES;
+            break;
+        case 3:
+            if (fp->quincunx_subsampling)
+                stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
+            else
+                stereo->type = AV_STEREO3D_SIDEBYSIDE;
+            break;
+        case 4:
+            stereo->type = AV_STEREO3D_TOPBOTTOM;
+            break;
+        case 5:
+            stereo->type = AV_STEREO3D_FRAMESEQUENCE;
+            break;
+        case 6:
+            stereo->type = AV_STEREO3D_2D;
+            break;
+        }
+
+        if (fp->content_interpretation_type == 2)
+            stereo->flags = AV_STEREO3D_FLAG_INVERT;
+    }
+
+    if (h->sei.display_orientation.present &&
+        (h->sei.display_orientation.anticlockwise_rotation ||
+         h->sei.display_orientation.hflip ||
+         h->sei.display_orientation.vflip)) {
+        H264SEIDisplayOrientation *o = &h->sei.display_orientation;
+        double angle = o->anticlockwise_rotation * 360 / (double) (1 << 16);
+        AVFrameSideData *rotation = av_frame_new_side_data(cur->f,
+                                                           AV_FRAME_DATA_DISPLAYMATRIX,
+                                                           sizeof(int32_t) * 9);
+        if (!rotation)
+            return AVERROR(ENOMEM);
+
+        av_display_rotation_set((int32_t *)rotation->data, angle);
+        av_display_matrix_flip((int32_t *)rotation->data,
+                               o->hflip, o->vflip);
+    }
+
+    if (h->sei.afd.present) {
+        AVFrameSideData *sd = av_frame_new_side_data(cur->f, AV_FRAME_DATA_AFD,
+                                                     sizeof(uint8_t));
+        if (!sd)
+            return AVERROR(ENOMEM);
+
+        *sd->data = h->sei.afd.active_format_description;
+        h->sei.afd.present = 0;
+    }
+
+    if (h->sei.a53_caption.a53_caption) {
+        H264SEIA53Caption *a53 = &h->sei.a53_caption;
+        AVFrameSideData *sd = av_frame_new_side_data(cur->f,
+                                                     AV_FRAME_DATA_A53_CC,
+                                                     a53->a53_caption_size);
+        if (!sd)
+            return AVERROR(ENOMEM);
+
+        memcpy(sd->data, a53->a53_caption, a53->a53_caption_size);
+        av_freep(&a53->a53_caption);
+        a53->a53_caption_size = 0;
+    }
+
+    if (h->sei.alternative_transfer.present &&
+        av_color_transfer_name(h->sei.alternative_transfer.preferred_transfer_characteristics) &&
+        h->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
+        h->avctx->color_trc = cur->f->color_trc = h->sei.alternative_transfer.preferred_transfer_characteristics;
+    }
+
+    return 0;
+}
+
+static int h264_select_output_frame(H264Context *h)
+{
+    const SPS *sps = h->ps.sps;
+    H264Picture *out = h->cur_pic_ptr;
+    H264Picture *cur = h->cur_pic_ptr;
+    int i, pics, out_of_order, out_idx;
+    int invalid = 0, cnt = 0;
+    int ret;
+
+    if (sps->bitstream_restriction_flag ||
+        h->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
+        h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, sps->num_reorder_frames);
+    }
+
+    pics = 0;
+    while (h->delayed_pic[pics])
+        pics++;
+
+    assert(pics <= MAX_DELAYED_PIC_COUNT);
+
+    h->delayed_pic[pics++] = cur;
+    if (cur->reference == 0)
+        cur->reference = DELAYED_PIC_REF;
+
+    /* Frame reordering. This code takes pictures from coding order and sorts
+     * them by their incremental POC value into display order. It supports POC
+     * gaps, MMCO reset codes and random resets.
+     * A "display group" can start either with a IDR frame (f.key_frame = 1),
+     * and/or can be closed down with a MMCO reset code. In sequences where
+     * there is no delay, we can't detect that (since the frame was already
+     * output to the user), so we also set h->mmco_reset to detect the MMCO
+     * reset code.
+     * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
+     * we increase the delay between input and output. All frames affected by
+     * the lag (e.g. those that should have been output before another frame
+     * that we already returned to the user) will be dropped. This is a bug
+     * that we will fix later. */
+    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
+        cnt     += out->poc < h->last_pocs[i];
+        invalid += out->poc == INT_MIN;
+    }
+    if (!h->mmco_reset && !cur->f->key_frame &&
+        cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
+        h->mmco_reset = 2;
+        if (pics > 1)
+            h->delayed_pic[pics - 2]->mmco_reset = 2;
+    }
+    if (h->mmco_reset || cur->f->key_frame) {
+        for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
+            h->last_pocs[i] = INT_MIN;
+        cnt     = 0;
+        invalid = MAX_DELAYED_PIC_COUNT;
+    }
+    out     = h->delayed_pic[0];
+    out_idx = 0;
+    for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
+                h->delayed_pic[i] &&
+                !h->delayed_pic[i - 1]->mmco_reset &&
+                !h->delayed_pic[i]->f->key_frame;
+         i++)
+        if (h->delayed_pic[i]->poc < out->poc) {
+            out     = h->delayed_pic[i];
+            out_idx = i;
+        }
+    if (h->avctx->has_b_frames == 0 &&
+        (h->delayed_pic[0]->f->key_frame || h->mmco_reset))
+        h->next_outputed_poc = INT_MIN;
+    out_of_order = !out->f->key_frame && !h->mmco_reset &&
+                   (out->poc < h->next_outputed_poc);
+
+    if (sps->bitstream_restriction_flag &&
+        h->avctx->has_b_frames >= sps->num_reorder_frames) {
+    } else if (out_of_order && pics - 1 == h->avctx->has_b_frames &&
+               h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
+        if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
+            h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt);
+        }
+    } else if (!h->avctx->has_b_frames &&
+               ((h->next_outputed_poc != INT_MIN &&
+                 out->poc > h->next_outputed_poc + 2) ||
+                cur->f->pict_type == AV_PICTURE_TYPE_B)) {
+        h->avctx->has_b_frames++;
+    }
+
+    if (pics > h->avctx->has_b_frames) {
+        out->reference &= ~DELAYED_PIC_REF;
+        for (i = out_idx; h->delayed_pic[i]; i++)
+            h->delayed_pic[i] = h->delayed_pic[i + 1];
+    }
+    memmove(h->last_pocs, &h->last_pocs[1],
+            sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
+    h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
+    if (!out_of_order && pics > h->avctx->has_b_frames) {
+        av_frame_unref(h->output_frame);
+        ret = av_frame_ref(h->output_frame, out->f);
+        if (ret < 0)
+            return ret;
+
+        if (out->recovered) {
+            // We have reached an recovery point and all frames after it in
+            // display order are "recovered".
+            h->frame_recovered |= FRAME_RECOVERED_SEI;
+        }
+        out->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_SEI);
+
+        if (!out->recovered) {
+            if (!(h->avctx->flags & AV_CODEC_FLAG_OUTPUT_CORRUPT))
+                av_frame_unref(h->output_frame);
+            else
+                h->output_frame->flags |= AV_FRAME_FLAG_CORRUPT;
+        }
+
+        if (out->mmco_reset) {
+            if (out_idx > 0) {
+                h->next_outputed_poc                    = out->poc;
+                h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
+            } else {
+                h->next_outputed_poc = INT_MIN;
+            }
+        } else {
+            if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f->key_frame) {
+                h->next_outputed_poc = INT_MIN;
+            } else {
+                h->next_outputed_poc = out->poc;
+            }
+        }
+        h->mmco_reset = 0;
+    } else {
+        av_log(h->avctx, AV_LOG_DEBUG, "no picture\n");
+    }
+
+    return 0;
+}
+
  /* This function is called right after decoding the slice header for a first
   * slice in a field (or a frame). It decides whether we are decoding a new frame
   * or a second field in a pair and does the necessary setup.
@@ -1114,6 +1427,9 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
                  h->short_ref[0]->f->width == prev->f->width &&
                  h->short_ref[0]->f->height == prev->f->height &&
                  h->short_ref[0]->f->format == prev->f->format) {
+                ff_thread_await_progress(&prev->tf, INT_MAX, 0);
+                if (prev->field_picture)
+                    ff_thread_await_progress(&prev->tf, INT_MAX, 1);
                  av_image_copy(h->short_ref[0]->f->data,
                                h->short_ref[0]->f->linesize,
                                (const uint8_t **)prev->f->data,
@@ -1174,24 +1490,62 @@ static int h264_field_start(H264Context *h, const H264SliceContext *sl,
      h->nb_mmco = sl->nb_mmco;
      h->explicit_ref_marking = sl->explicit_ref_marking;
  
+    h->picture_idr = nal->type == H264_NAL_IDR_SLICE;
+
+    if (h->sei.recovery_point.recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
+        h->recovery_frame = (h->poc.frame_num + h->sei.recovery_point.recovery_frame_cnt) &
+                            ((1 << h->ps.sps->log2_max_frame_num) - 1);
+    }
+
+    h->cur_pic_ptr->f->key_frame |= (nal->type == H264_NAL_IDR_SLICE) ||
+                                    (h->sei.recovery_point.recovery_frame_cnt >= 0);
+
+    if (nal->type == H264_NAL_IDR_SLICE || h->recovery_frame == h->poc.frame_num) {
+        h->recovery_frame         = -1;
+        h->cur_pic_ptr->recovered = 1;
+    }
+    // If we have an IDR, all frames after it in decoded order are
+    // "recovered".
+    if (nal->type == H264_NAL_IDR_SLICE)
+        h->frame_recovered |= FRAME_RECOVERED_IDR;
+    h->cur_pic_ptr->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_IDR);
+
+    /* Set the frame properties/side data. Only done for the second field in
+     * field coded frames, since some SEI information is present for each field
+     * and is merged by the SEI parsing code. */
+    if (!FIELD_PICTURE(h) || !h->first_field) {
+        ret = h264_export_frame_props(h);
+        if (ret < 0)
+            return ret;
+
+        ret = h264_select_output_frame(h);
+        if (ret < 0)
+            return ret;
+    }
+
+    if (h->avctx->hwaccel) {
+        ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0);
+        if (ret < 0)
+            return ret;
+    }
+
      return 0;
  }
  
-static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
-                                   const H2645NAL *nal)
+static int h264_slice_header_parse(H264SliceContext *sl, const H2645NAL *nal,
+                                   const H264ParamSets *ps, AVCodecContext *avctx)
  {
      const SPS *sps;
      const PPS *pps;
      int ret;
      unsigned int slice_type, tmp, i;
-    int field_pic_flag, bottom_field_flag;
-    int droppable, picture_structure;
+    int field_pic_flag, bottom_field_flag, picture_structure;
  
      sl->first_mb_addr = get_ue_golomb(&sl->gb);
  
      slice_type = get_ue_golomb_31(&sl->gb);
      if (slice_type > 9) {
-        av_log(h->avctx, AV_LOG_ERROR,
+        av_log(avctx, AV_LOG_ERROR,
                 "slice type %d too large at %d\n",
                 slice_type, sl->first_mb_addr);
          return AVERROR_INVALIDDATA;
@@ -1206,37 +1560,36 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
      sl->slice_type     = slice_type;
      sl->slice_type_nos = slice_type & 3;
  
-    if (nal->type  == NAL_IDR_SLICE &&
+    if (nal->type  == H264_NAL_IDR_SLICE &&
          sl->slice_type_nos != AV_PICTURE_TYPE_I) {
-        av_log(h->avctx, AV_LOG_ERROR, "A non-intra slice in an IDR NAL unit.\n");
+        av_log(avctx, AV_LOG_ERROR, "A non-intra slice in an IDR NAL unit.\n");
          return AVERROR_INVALIDDATA;
      }
  
      sl->pps_id = get_ue_golomb(&sl->gb);
      if (sl->pps_id >= MAX_PPS_COUNT) {
-        av_log(h->avctx, AV_LOG_ERROR, "pps_id %u out of range\n", sl->pps_id);
+        av_log(avctx, AV_LOG_ERROR, "pps_id %u out of range\n", sl->pps_id);
          return AVERROR_INVALIDDATA;
      }
-    if (!h->ps.pps_list[sl->pps_id]) {
-        av_log(h->avctx, AV_LOG_ERROR,
+    if (!ps->pps_list[sl->pps_id]) {
+        av_log(avctx, AV_LOG_ERROR,
                 "non-existing PPS %u referenced\n",
                 sl->pps_id);
          return AVERROR_INVALIDDATA;
      }
-    pps = (const PPS*)h->ps.pps_list[sl->pps_id]->data;
+    pps = (const PPS*)ps->pps_list[sl->pps_id]->data;
  
-    if (!h->ps.sps_list[pps->sps_id]) {
-        av_log(h->avctx, AV_LOG_ERROR,
+    if (!ps->sps_list[pps->sps_id]) {
+        av_log(avctx, AV_LOG_ERROR,
                 "non-existing SPS %u referenced\n", pps->sps_id);
          return AVERROR_INVALIDDATA;
      }
-    sps = (const SPS*)h->ps.sps_list[pps->sps_id]->data;
+    sps = (const SPS*)ps->sps_list[pps->sps_id]->data;
  
      sl->frame_num = get_bits(&sl->gb, sps->log2_max_frame_num);
  
      sl->mb_mbaff       = 0;
  
-    droppable = nal->ref_idc == 0;
      if (sps->frame_mbs_only_flag) {
          picture_structure = PICT_FRAME;
      } else {
@@ -1259,7 +1612,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
          sl->max_pic_num  = 1 << (sps->log2_max_frame_num + 1);
      }
  
-    if (nal->type == NAL_IDR_SLICE)
+    if (nal->type == H264_NAL_IDR_SLICE)
          get_ue_golomb(&sl->gb); /* idr_pic_id */
  
      if (sps->poc_type == 0) {
@@ -1276,6 +1629,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
              sl->delta_poc[1] = get_se_golomb(&sl->gb);
      }
  
+    sl->redundant_pic_count = 0;
      if (pps->redundant_pic_cnt_present)
          sl->redundant_pic_count = get_ue_golomb(&sl->gb);
  
@@ -1289,7 +1643,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
          return ret;
  
      if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
-       ret = ff_h264_decode_ref_pic_list_reordering(h, sl);
+       ret = ff_h264_decode_ref_pic_list_reordering(sl, avctx);
         if (ret < 0) {
             sl->ref_count[1] = sl->ref_count[0] = 0;
             return ret;
@@ -1309,15 +1663,15 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
  
      sl->explicit_ref_marking = 0;
      if (nal->ref_idc) {
-        ret = ff_h264_decode_ref_pic_marking(h, sl, &sl->gb);
-        if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
+        ret = ff_h264_decode_ref_pic_marking(sl, &sl->gb, nal, avctx);
+        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
              return AVERROR_INVALIDDATA;
      }
  
      if (sl->slice_type_nos != AV_PICTURE_TYPE_I && pps->cabac) {
          tmp = get_ue_golomb_31(&sl->gb);
          if (tmp > 2) {
-            av_log(h->avctx, AV_LOG_ERROR, "cabac_init_idc %u overflow\n", tmp);
+            av_log(avctx, AV_LOG_ERROR, "cabac_init_idc %u overflow\n", tmp);
              return AVERROR_INVALIDDATA;
          }
          sl->cabac_init_idc = tmp;
@@ -1326,7 +1680,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
      sl->last_qscale_diff = 0;
      tmp = pps->init_qp + get_se_golomb(&sl->gb);
      if (tmp > 51 + 6 * (sps->bit_depth_luma - 8)) {
-        av_log(h->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
+        av_log(avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
          return AVERROR_INVALIDDATA;
      }
      sl->qscale       = tmp;
@@ -1345,7 +1699,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
      if (pps->deblocking_filter_parameters_present) {
          tmp = get_ue_golomb_31(&sl->gb);
          if (tmp > 2) {
-            av_log(h->avctx, AV_LOG_ERROR,
+            av_log(avctx, AV_LOG_ERROR,
                     "deblocking_filter_idc %u out of range\n", tmp);
              return AVERROR_INVALIDDATA;
          }
@@ -1360,7 +1714,7 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
                  sl->slice_alpha_c0_offset < -12 ||
                  sl->slice_beta_offset >  12     ||
                  sl->slice_beta_offset < -12) {
-                av_log(h->avctx, AV_LOG_ERROR,
+                av_log(avctx, AV_LOG_ERROR,
                         "deblocking filter parameters %d %d out of range\n",
                         sl->slice_alpha_c0_offset, sl->slice_beta_offset);
                  return AVERROR_INVALIDDATA;
@@ -1371,46 +1725,13 @@ static int h264_slice_header_parse(const H264Context *h, H264SliceContext *sl,
      return 0;
  }
  
-/**
- * Decode a slice header.
- * This will (re)initialize the decoder and call h264_frame_start() as needed.
- *
- * @param h h264context
- *
- * @return 0 if okay, <0 if an error occurred
- */
-int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl,
-                                const H2645NAL *nal)
+/* do all the per-slice initialization needed before we can start decoding the
+ * actual MBs */
+static int h264_slice_init(H264Context *h, H264SliceContext *sl,
+                           const H2645NAL *nal)
  {
      int i, j, ret = 0;
  
-    ret = h264_slice_header_parse(h, sl, nal);
-    if (ret < 0)
-        return ret;
-
-    if (!h->setup_finished) {
-        if (sl->first_mb_addr == 0) { // FIXME better field boundary detection
-            if (h->current_slice && h->cur_pic_ptr && FIELD_PICTURE(h)) {
-                ff_h264_field_end(h, sl, 1);
-            }
-
-            h->current_slice = 0;
-            if (!h->first_field) {
-                if (h->cur_pic_ptr && !h->droppable) {
-                    ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                              h->picture_structure == PICT_BOTTOM_FIELD);
-                }
-                h->cur_pic_ptr = NULL;
-            }
-        }
-
-        if (h->current_slice == 0) {
-            ret = h264_field_start(h, sl, nal);
-            if (ret < 0)
-                return ret;
-        }
-    }
-
      if (h->current_slice > 0) {
          if (h->ps.pps != (const PPS*)h->ps.pps_list[sl->pps_id]->data) {
              av_log(h->avctx, AV_LOG_ERROR, "PPS changed between slices\n");
@@ -1431,6 +1752,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl,
          }
      }
  
+    if (h->picture_idr && nal->type != H264_NAL_IDR_SLICE) {
+        av_log(h->avctx, AV_LOG_ERROR, "Invalid mix of IDR and non-IDR slices\n");
+        return AVERROR_INVALIDDATA;
+    }
+
      assert(h->mb_num == h->mb_width * h->mb_height);
      if (sl->first_mb_addr << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
          sl->first_mb_addr >= h->mb_num) {
@@ -1533,7 +1859,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl,
                 sl->mb_y * h->mb_width + sl->mb_x,
                 av_get_picture_type_char(sl->slice_type),
                 sl->slice_type_fixed ? " fix" : "",
-               nal->type == NAL_IDR_SLICE ? " IDR" : "",
+               nal->type == H264_NAL_IDR_SLICE ? " IDR" : "",
                 h->poc.frame_num,
                 h->cur_pic_ptr->field_poc[0],
                 h->cur_pic_ptr->field_poc[1],
@@ -1549,6 +1875,75 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl,
      return 0;
  }
  
+int ff_h264_queue_decode_slice(H264Context *h, const H2645NAL *nal)
+{
+    H264SliceContext *sl = h->slice_ctx + h->nb_slice_ctx_queued;
+    int ret;
+
+    sl->gb = nal->gb;
+
+    ret = h264_slice_header_parse(sl, nal, &h->ps, h->avctx);
+    if (ret < 0)
+        return ret;
+
+    // discard redundant pictures
+    if (sl->redundant_pic_count > 0)
+        return 0;
+
+    if (!h->setup_finished) {
+        if (sl->first_mb_addr == 0) { // FIXME better field boundary detection
+            // this slice starts a new field
+            // first decode any pending queued slices
+            if (h->nb_slice_ctx_queued) {
+                H264SliceContext tmp_ctx;
+
+                ret = ff_h264_execute_decode_slices(h);
+                if (ret < 0 && (h->avctx->err_recognition & AV_EF_EXPLODE))
+                    return ret;
+
+                memcpy(&tmp_ctx, h->slice_ctx, sizeof(tmp_ctx));
+                memcpy(h->slice_ctx, sl, sizeof(tmp_ctx));
+                memcpy(sl, &tmp_ctx, sizeof(tmp_ctx));
+                sl = h->slice_ctx;
+            }
+
+            if (h->field_started)
+                ff_h264_field_end(h, sl, 1);
+
+            h->current_slice = 0;
+            if (!h->first_field) {
+                if (h->cur_pic_ptr && !h->droppable) {
+                    ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                              h->picture_structure == PICT_BOTTOM_FIELD);
+                }
+                h->cur_pic_ptr = NULL;
+            }
+        }
+
+        if (h->current_slice == 0) {
+            ret = h264_field_start(h, sl, nal);
+            if (ret < 0)
+                return ret;
+            h->field_started = 1;
+        }
+    }
+
+    ret = h264_slice_init(h, sl, nal);
+    if (ret < 0)
+        return ret;
+
+    if ((h->avctx->skip_frame < AVDISCARD_NONREF || nal->ref_idc) &&
+        (h->avctx->skip_frame < AVDISCARD_BIDIR  ||
+         sl->slice_type_nos != AV_PICTURE_TYPE_B) &&
+        (h->avctx->skip_frame < AVDISCARD_NONKEY ||
+         h->cur_pic_ptr->f->key_frame) &&
+        h->avctx->skip_frame < AVDISCARD_ALL) {
+        h->nb_slice_ctx_queued++;
+    }
+
+    return 0;
+}
+
  int ff_h264_get_slice_type(const H264SliceContext *sl)
  {
      switch (sl->slice_type) {
@@ -1582,12 +1977,12 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
          if (USES_LIST(top_type, list)) {
              const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
              const int b8_xy = 4 * top_xy + 2;
-            int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+            const int *ref2frm = &h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][list][(MB_MBAFF(sl) ? 20 : 2)];
              AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
              ref_cache[0 - 1 * 8] =
-            ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
+            ref_cache[1 - 1 * 8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 0]];
              ref_cache[2 - 1 * 8] =
-            ref_cache[3 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 1]];
+            ref_cache[3 - 1 * 8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 1]];
          } else {
              AV_ZERO128(mv_dst - 1 * 8);
              AV_WN32A(&ref_cache[0 - 1 * 8], ((LIST_NOT_USED) & 0xFF) * 0x01010101u);
@@ -1597,15 +1992,15 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
              if (USES_LIST(left_type[LTOP], list)) {
                  const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                  const int b8_xy = 4 * left_xy[LTOP] + 1;
-                int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
+                const int *ref2frm = &h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][list][(MB_MBAFF(sl) ? 20 : 2)];
                  AV_COPY32(mv_dst - 1 +  0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
                  AV_COPY32(mv_dst - 1 +  8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
                  AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
                  AV_COPY32(mv_dst - 1 + 24, h->cur_pic.motion_val[list][b_xy + b_stride * 3]);
                  ref_cache[-1 +  0] =
-                ref_cache[-1 +  8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
+                ref_cache[-1 +  8] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 2 * 0]];
                  ref_cache[-1 + 16] =
-                ref_cache[-1 + 24] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
+                ref_cache[-1 + 24] = ref2frm[h->cur_pic.ref_index[list][b8_xy + 2 * 1]];
              } else {
                  AV_ZERO32(mv_dst - 1 +  0);
                  AV_ZERO32(mv_dst - 1 +  8);
@@ -1630,9 +2025,9 @@ static av_always_inline void fill_filter_caches_inter(const H264Context *h,
  
      {
          int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
-        int (*ref2frm)[64] = h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(sl) ? 20 : 2);
-        uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
-        uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
+        const int *ref2frm = &h->ref2frm[sl->slice_num & (MAX_SLICES - 1)][list][(MB_MBAFF(sl) ? 20 : 2)];
+        uint32_t ref01 = (pack16to32(ref2frm[ref[0]], ref2frm[ref[1]]) & 0x00FF00FF) * 0x0101;
+        uint32_t ref23 = (pack16to32(ref2frm[ref[2]], ref2frm[ref[3]]) & 0x00FF00FF) * 0x0101;
          AV_WN32A(&ref_cache[0 * 8], ref01);
          AV_WN32A(&ref_cache[1 * 8], ref01);
          AV_WN32A(&ref_cache[2 * 8], ref23);
@@ -1943,7 +2338,6 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
          sl->deblocking_filter = 0;
  
      sl->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
-                     avctx->codec_id != AV_CODEC_ID_H264 ||
                       (CONFIG_GRAY && (h->flags & AV_CODEC_FLAG_GRAY));
  
      if (h->ps.pps->cabac) {
@@ -2116,25 +2510,26 @@ finish:
   * Call decode_slice() for each context.
   *
   * @param h h264 master context
- * @param context_count number of contexts to execute
   */
-int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
+int ff_h264_execute_decode_slices(H264Context *h)
  {
      AVCodecContext *const avctx = h->avctx;
      H264SliceContext *sl;
+    int context_count = h->nb_slice_ctx_queued;
+    int ret = 0;
      int i, j;
  
-    if (h->avctx->hwaccel)
+    if (h->avctx->hwaccel || context_count < 1)
          return 0;
      if (context_count == 1) {
-        int ret;
  
          h->slice_ctx[0].next_slice_idx = h->mb_width * h->mb_height;
          h->postpone_filter = 0;
  
          ret = decode_slice(avctx, &h->slice_ctx[0]);
          h->mb_y = h->slice_ctx[0].mb_y;
-        return ret;
+        if (ret < 0)
+            goto finish;
      } else {
          for (i = 0; i < context_count; i++) {
              int next_slice_idx = h->mb_width * h->mb_height;
@@ -2184,5 +2579,7 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
          }
      }
  
-    return 0;
+finish:
+    h->nb_slice_ctx_queued = 0;
+    return ret;
  }