hevc: do not store the transform inter_split flag in the context

[ffmpeg] / libavcodec / hevc.c
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c

index 7481023cc35def971e8f3007480b2c2721849c03..c2c4006fdebf8a5114337e360da0544fb7e62064 100644 (file)
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -25,15 +25,16 @@
  
  #include "libavutil/attributes.h"
  #include "libavutil/common.h"
+#include "libavutil/display.h"
  #include "libavutil/internal.h"
  #include "libavutil/md5.h"
  #include "libavutil/opt.h"
  #include "libavutil/pixdesc.h"
  #include "libavutil/stereo3d.h"
  
+#include "bswapdsp.h"
  #include "bytestream.h"
  #include "cabac_functions.h"
-#include "dsputil.h"
  #include "golomb.h"
  #include "hevc.h"
  
@@ -165,7 +166,6 @@ static void pic_arrays_free(HEVCContext *s)
  {
      av_freep(&s->sao);
      av_freep(&s->deblock);
-    av_freep(&s->split_cu_flag);
  
      av_freep(&s->skip_flag);
      av_freep(&s->tab_ct_depth);
@@ -191,7 +191,6 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
      int log2_min_cb_size = sps->log2_min_cb_size;
      int width            = sps->width;
      int height           = sps->height;
-    int pic_size         = width * height;
      int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
                             ((height >> log2_min_cb_size) + 1);
      int ctb_count        = sps->ctb_width * sps->ctb_height;
@@ -202,8 +201,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  
      s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
      s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
-    s->split_cu_flag = av_malloc(pic_size);
-    if (!s->sao || !s->deblock || !s->split_cu_flag)
+    if (!s->sao || !s->deblock)
          goto fail;
  
      s->skip_flag    = av_malloc(pic_size_in_ctb);
@@ -212,7 +210,7 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
          goto fail;
  
      s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
-    s->tab_ipm  = av_malloc(min_pu_size);
+    s->tab_ipm  = av_mallocz(min_pu_size);
      s->is_pcm   = av_malloc(min_pu_size);
      if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
          goto fail;
@@ -338,7 +336,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
      const HEVCSPS *sps = s->sps;
      int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
      int prev_delta_msb = 0;
-    int nb_sps = 0, nb_sh;
+    unsigned int nb_sps = 0, nb_sh;
      int i;
  
      rps->nb_refs = 0;
@@ -388,7 +386,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
  static int set_sps(HEVCContext *s, const HEVCSPS *sps)
  {
      int ret;
-    int num = 0, den = 0;
+    unsigned int num = 0, den = 0;
  
      pic_arrays_free(s);
      ret = pic_arrays_init(s, sps);
@@ -400,9 +398,10 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
      s->avctx->width               = sps->output_width;
      s->avctx->height              = sps->output_height;
      s->avctx->pix_fmt             = sps->pix_fmt;
-    s->avctx->sample_aspect_ratio = sps->vui.sar;
      s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
  
+    ff_set_sar(s->avctx, sps->vui.sar);
+
      if (sps->vui.video_signal_type_present_flag)
          s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
                                                                 : AVCOL_RANGE_MPEG;
@@ -468,7 +467,7 @@ static int hls_slice_header(HEVCContext *s)
          if (IS_IDR(s))
              ff_hevc_clear_refs(s);
      }
-    if (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
+    if (IS_IRAP(s))
          sh->no_output_of_prior_pics_flag = get_bits1(gb);
  
      sh->pps_id = get_ue_golomb_long(gb);
@@ -544,6 +543,8 @@ static int hls_slice_header(HEVCContext *s)
              return AVERROR_INVALIDDATA;
          }
  
+        // when flag is not present, picture is inferred to be output
+        sh->pic_output_flag = 1;
          if (s->pps->output_flag_present_flag)
              sh->pic_output_flag = get_bits1(gb);
  
@@ -703,6 +704,7 @@ static int hls_slice_header(HEVCContext *s)
          }
  
          sh->slice_qp_delta = get_se_golomb(gb);
+
          if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
              sh->slice_cb_qp_offset = get_se_golomb(gb);
              sh->slice_cr_qp_offset = get_se_golomb(gb);
@@ -759,20 +761,35 @@ static int hls_slice_header(HEVCContext *s)
      }
  
      if (s->pps->slice_header_extension_present_flag) {
-        int length = get_ue_golomb_long(gb);
+        unsigned int length = get_ue_golomb_long(gb);
          for (i = 0; i < length; i++)
              skip_bits(gb, 8);  // slice_header_extension_data_byte
      }
  
      // Inferred parameters
-    sh->slice_qp          = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    if (sh->slice_qp > 51 ||
+        sh->slice_qp < -s->sps->qp_bd_offset) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "The slice_qp %d is outside the valid range "
+               "[%d, 51].\n",
+               sh->slice_qp,
+               -s->sps->qp_bd_offset);
+        return AVERROR_INVALIDDATA;
+    }
+
      sh->slice_ctb_addr_rs = sh->slice_segment_addr;
  
+    if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
      s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
  
      if (!s->pps->cu_qp_delta_enabled_flag)
-        s->HEVClc.qp_y = ((s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset) %
-                          (52 + s->sps->qp_bd_offset)) - s->sps->qp_bd_offset;
+        s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
+                                52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
  
      s->slice_initialized = 1;
  
@@ -1213,10 +1230,10 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
      }
  }
  
-static void hls_transform_unit(HEVCContext *s, int x0, int y0,
-                               int xBase, int yBase, int cb_xBase, int cb_yBase,
-                               int log2_cb_size, int log2_trafo_size,
-                               int trafo_depth, int blk_idx)
+static int hls_transform_unit(HEVCContext *s, int x0, int y0,
+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
+                              int log2_cb_size, int log2_trafo_size,
+                              int trafo_depth, int blk_idx)
  {
      HEVCLocalContext *lc = &s->HEVClc;
  
@@ -1224,18 +1241,18 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0,
          int trafo_size = 1 << log2_trafo_size;
          ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
  
-        s->hpc.intra_pred(s, x0, y0, log2_trafo_size, 0);
+        s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
          if (log2_trafo_size > 2) {
              trafo_size = trafo_size << (s->sps->hshift[1] - 1);
              ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
-            s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 1);
-            s->hpc.intra_pred(s, x0, y0, log2_trafo_size - 1, 2);
+            s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
+            s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
          } else if (blk_idx == 3) {
              trafo_size = trafo_size << s->sps->hshift[1];
              ff_hevc_set_neighbour_available(s, xBase, yBase,
                                              trafo_size, trafo_size);
-            s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 1);
-            s->hpc.intra_pred(s, xBase, yBase, log2_trafo_size, 2);
+            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
+            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
          }
      }
  
@@ -1251,6 +1268,18 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0,
                  if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
                      lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
              lc->tu.is_cu_qp_delta_coded = 1;
+
+            if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
+                lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "The cu_qp_delta %d is outside the valid range "
+                       "[%d, %d].\n",
+                       lc->tu.cu_qp_delta,
+                       -(26 + s->sps->qp_bd_offset / 2),
+                        (25 + s->sps->qp_bd_offset / 2));
+                return AVERROR_INVALIDDATA;
+            }
+
              ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
          }
  
@@ -1286,6 +1315,7 @@ static void hls_transform_unit(HEVCContext *s, int x0, int y0,
                  hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
          }
      }
+    return 0;
  }
  
  static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
@@ -1303,13 +1333,14 @@ static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_si
              s->is_pcm[i + j * min_pu_width] = 2;
  }
  
-static void hls_transform_tree(HEVCContext *s, int x0, int y0,
-                               int xBase, int yBase, int cb_xBase, int cb_yBase,
-                               int log2_cb_size, int log2_trafo_size,
-                               int trafo_depth, int blk_idx)
+static int hls_transform_tree(HEVCContext *s, int x0, int y0,
+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
+                              int log2_cb_size, int log2_trafo_size,
+                              int trafo_depth, int blk_idx)
  {
      HEVCLocalContext *lc = &s->HEVClc;
      uint8_t split_transform_flag;
+    int ret;
  
      if (trafo_depth > 0 && log2_trafo_size == 2) {
          SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
@@ -1330,20 +1361,20 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0,
  
      lc->tt.cbf_luma = 1;
  
-    lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
-                              lc->cu.pred_mode == MODE_INTER &&
-                              lc->cu.part_mode != PART_2Nx2N &&
-                              trafo_depth == 0;
-
      if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
          log2_trafo_size >  s->sps->log2_min_tb_size    &&
          trafo_depth     < lc->cu.max_trafo_depth       &&
          !(lc->cu.intra_split_flag && trafo_depth == 0)) {
          split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
      } else {
+        int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
+                          lc->cu.pred_mode == MODE_INTER &&
+                          lc->cu.part_mode != PART_2Nx2N &&
+                          trafo_depth == 0;
+
          split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
                                 (lc->cu.intra_split_flag && trafo_depth == 0) ||
-                               lc->tt.inter_split_flag;
+                               inter_split;
      }
  
      if (log2_trafo_size > 2) {
@@ -1361,17 +1392,24 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0,
      }
  
      if (split_transform_flag) {
-        int x1 = x0 + ((1 << log2_trafo_size) >> 1);
-        int y1 = y0 + ((1 << log2_trafo_size) >> 1);
-
-        hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
-                           log2_trafo_size - 1, trafo_depth + 1, 0);
-        hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
-                           log2_trafo_size - 1, trafo_depth + 1, 1);
-        hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
-                           log2_trafo_size - 1, trafo_depth + 1, 2);
-        hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase, log2_cb_size,
-                           log2_trafo_size - 1, trafo_depth + 1, 3);
+        const int trafo_size_split = 1 << (log2_trafo_size - 1);
+        const int x1 = x0 + trafo_size_split;
+        const int y1 = y0 + trafo_size_split;
+
+#define SUBDIVIDE(x, y, idx)                                                    \
+do {                                                                            \
+    ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
+                             log2_trafo_size - 1, trafo_depth + 1, idx);        \
+    if (ret < 0)                                                                \
+        return ret;                                                             \
+} while (0)
+
+        SUBDIVIDE(x0, y0, 0);
+        SUBDIVIDE(x1, y0, 1);
+        SUBDIVIDE(x0, y1, 2);
+        SUBDIVIDE(x1, y1, 3);
+
+#undef SUBDIVIDE
      } else {
          int min_tu_size      = 1 << s->sps->log2_min_tb_size;
          int log2_min_tu_size = s->sps->log2_min_tb_size;
@@ -1383,9 +1421,11 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0,
              lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
          }
  
-        hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
-                           log2_cb_size, log2_trafo_size, trafo_depth, blk_idx);
-
+        ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
+                                 log2_cb_size, log2_trafo_size, trafo_depth,
+                                 blk_idx);
+        if (ret < 0)
+            return ret;
          // TODO: store cbf_luma somewhere else
          if (lc->tt.cbf_luma) {
              int i, j;
@@ -1405,6 +1445,7 @@ static void hls_transform_tree(HEVCContext *s, int x0, int y0,
                  set_deblocking_bypass(s, x0, y0, log2_trafo_size);
          }
      }
+    return 0;
  }
  
  static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
@@ -1421,7 +1462,7 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
      uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
  
      int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
-    const uint8_t *pcm = skip_bytes(&s->HEVClc.cc, (length + 7) >> 3);
+    const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
      int ret;
  
      ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
@@ -1497,15 +1538,19 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
      if (x_off < extra_left || y_off < extra_top ||
          x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
          y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
          int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
+        int buf_offset = extra_top *
+                         edge_emu_stride + (extra_left << s->sps->pixel_shift);
  
          s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
-                                 srcstride, srcstride,
+                                 edge_emu_stride, srcstride,
                                   block_w + ff_hevc_qpel_extra[mx],
                                   block_h + ff_hevc_qpel_extra[my],
                                   x_off - extra_left, y_off - extra_top,
                                   pic_width, pic_height);
-        src = lc->edge_emu_buffer + offset;
+        src = lc->edge_emu_buffer + buf_offset;
+        srcstride = edge_emu_stride;
      }
      s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
                                       block_h, lc->mc_buffer);
@@ -1548,27 +1593,35 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
      if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
          x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
          y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
          int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
+        int buf_offset1 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->sps->pixel_shift));
          int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
+        int buf_offset2 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->sps->pixel_shift));
  
          s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
-                                 src1stride, src1stride,
+                                 edge_emu_stride, src1stride,
                                   block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
                                   x_off - EPEL_EXTRA_BEFORE,
                                   y_off - EPEL_EXTRA_BEFORE,
                                   pic_width, pic_height);
  
-        src1 = lc->edge_emu_buffer + offset1;
+        src1 = lc->edge_emu_buffer + buf_offset1;
+        src1stride = edge_emu_stride;
          s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
                                               block_w, block_h, mx, my, lc->mc_buffer);
  
          s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
-                                 src2stride, src2stride,
+                                 edge_emu_stride, src2stride,
                                   block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
                                   x_off - EPEL_EXTRA_BEFORE,
                                   y_off - EPEL_EXTRA_BEFORE,
                                   pic_width, pic_height);
-        src2 = lc->edge_emu_buffer + offset2;
+        src2 = lc->edge_emu_buffer + buf_offset2;
+        src2stride = edge_emu_stride;
+
          s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
                                               block_w, block_h, mx, my,
                                               lc->mc_buffer);
@@ -2025,7 +2078,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
      int min_cb_width     = s->sps->min_cb_width;
      int x_cb             = x0 >> log2_min_cb_size;
      int y_cb             = y0 >> log2_min_cb_size;
-    int x, y;
+    int x, y, ret;
  
      lc->cu.x                = x0;
      lc->cu.y                = y0;
@@ -2082,7 +2135,6 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
                  lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
              }
              if (lc->cu.pcm_flag) {
-                int ret;
                  intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
                  ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
                  if (s->sps->pcm.loop_filter_disable_flag)
@@ -2141,8 +2193,11 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
                  lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
                                           s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
                                           s->sps->max_transform_hierarchy_depth_inter;
-                hls_transform_tree(s, x0, y0, x0, y0, x0, y0, log2_cb_size,
-                                   log2_cb_size, 0, 0);
+                ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
+                                         log2_cb_size,
+                                         log2_cb_size, 0, 0);
+                if (ret < 0)
+                    return ret;
              } else {
                  if (!s->sh.disable_deblocking_filter_flag)
                      ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
@@ -2171,16 +2226,15 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
  {
      HEVCLocalContext *lc = &s->HEVClc;
      const int cb_size    = 1 << log2_cb_size;
+    int split_cu;
  
      lc->ct.depth = cb_depth;
      if (x0 + cb_size <= s->sps->width  &&
          y0 + cb_size <= s->sps->height &&
          log2_cb_size > s->sps->log2_min_cb_size) {
-        SAMPLE(s->split_cu_flag, x0, y0) =
-            ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
+        split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
      } else {
-        SAMPLE(s->split_cu_flag, x0, y0) =
-            (log2_cb_size > s->sps->log2_min_cb_size);
+        split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
      }
      if (s->pps->cu_qp_delta_enabled_flag &&
          log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
@@ -2188,7 +2242,7 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
          lc->tu.cu_qp_delta          = 0;
      }
  
-    if (SAMPLE(s->split_cu_flag, x0, y0)) {
+    if (split_cu) {
          const int cb_size_split = cb_size >> 1;
          const int x1 = x0 + cb_size_split;
          const int y1 = y0 + cb_size_split;
@@ -2397,6 +2451,20 @@ static int set_side_data(HEVCContext *s)
              stereo->flags = AV_STEREO3D_FLAG_INVERT;
      }
  
+    if (s->sei_display_orientation_present &&
+        (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
+        double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
+        AVFrameSideData *rotation = av_frame_new_side_data(out,
+                                                           AV_FRAME_DATA_DISPLAYMATRIX,
+                                                           sizeof(int32_t) * 9);
+        if (!rotation)
+            return AVERROR(ENOMEM);
+
+        av_display_rotation_set((int32_t *)rotation->data, angle);
+        av_display_matrix_flip((int32_t *)rotation->data,
+                               s->sei_vflip, s->sei_hflip);
+    }
+
      return 0;
  }
  
@@ -2412,6 +2480,7 @@ static int hevc_frame_start(HEVCContext *s)
  
      lc->start_of_tiles_x = 0;
      s->is_decoded        = 0;
+    s->first_nal_type    = s->nal_unit_type;
  
      if (s->pps->tiles_enabled_flag)
          lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
@@ -2421,19 +2490,14 @@ static int hevc_frame_start(HEVCContext *s)
      if (ret < 0)
          goto fail;
  
-    av_fast_malloc(&lc->edge_emu_buffer, &lc->edge_emu_buffer_size,
-                   (MAX_PB_SIZE + 7) * s->ref->frame->linesize[0]);
-    if (!lc->edge_emu_buffer) {
-        ret = AVERROR(ENOMEM);
-        goto fail;
-    }
-
      ret = ff_hevc_frame_rps(s);
      if (ret < 0) {
          av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
          goto fail;
      }
  
+    s->ref->frame->key_frame = IS_IRAP(s);
+
      ret = set_side_data(s);
      if (ret < 0)
          goto fail;
@@ -2468,9 +2532,7 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
      if (ret < 0) {
          av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
                 s->nal_unit_type);
-        if (s->avctx->err_recognition & AV_EF_EXPLODE)
-            return ret;
-        return 0;
+        goto fail;
      } else if (!ret)
          return 0;
  
@@ -2478,23 +2540,23 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
      case NAL_VPS:
          ret = ff_hevc_decode_nal_vps(s);
          if (ret < 0)
-            return ret;
+            goto fail;
          break;
      case NAL_SPS:
          ret = ff_hevc_decode_nal_sps(s);
          if (ret < 0)
-            return ret;
+            goto fail;
          break;
      case NAL_PPS:
          ret = ff_hevc_decode_nal_pps(s);
          if (ret < 0)
-            return ret;
+            goto fail;
          break;
      case NAL_SEI_PREFIX:
      case NAL_SEI_SUFFIX:
          ret = ff_hevc_decode_nal_sei(s);
          if (ret < 0)
-            return ret;
+            goto fail;
          break;
      case NAL_TRAIL_R:
      case NAL_TRAIL_N:
@@ -2540,6 +2602,13 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
                  return ret;
          } else if (!s->ref) {
              av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
+            goto fail;
+        }
+
+        if (s->nal_unit_type != s->first_nal_type) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Non-matching NAL types of the VCL NALUs: %d %d\n",
+                   s->first_nal_type, s->nal_unit_type);
              return AVERROR_INVALIDDATA;
          }
  
@@ -2549,8 +2618,7 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
              if (ret < 0) {
                  av_log(s->avctx, AV_LOG_WARNING,
                         "Error constructing the reference lists for the current slice.\n");
-                if (s->avctx->err_recognition & AV_EF_EXPLODE)
-                    return ret;
+                goto fail;
              }
          }
  
@@ -2563,8 +2631,10 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
                  restore_tqb_pixels(s);
          }
  
-        if (ctb_addr_ts < 0)
-            return ctb_addr_ts;
+        if (ctb_addr_ts < 0) {
+            ret = ctb_addr_ts;
+            goto fail;
+        }
          break;
      case NAL_EOS_NUT:
      case NAL_EOB_NUT:
@@ -2580,6 +2650,10 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
      }
  
      return 0;
+fail:
+    if (s->avctx->err_recognition & AV_EF_EXPLODE)
+        return ret;
+    return 0;
  }
  
  /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
@@ -2760,8 +2834,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
          if (ret < 0) {
              av_log(s->avctx, AV_LOG_WARNING,
                     "Error parsing NAL unit #%d.\n", i);
-            if (s->avctx->err_recognition & AV_EF_EXPLODE)
-                goto fail;
+            goto fail;
          }
      }
  
@@ -2817,8 +2890,8 @@ static int verify_md5(HEVCContext *s, AVFrame *frame)
              const uint8_t *src = frame->data[i] + j * frame->linesize[i];
  #if HAVE_BIGENDIAN
              if (pixel_shift) {
-                s->dsp.bswap16_buf((uint16_t*)s->checksum_buf,
-                                   (const uint16_t*)src, w);
+                s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
+                                    (const uint16_t *) src, w);
                  src = s->checksum_buf;
              }
  #endif
@@ -2924,12 +2997,10 @@ fail:
  static av_cold int hevc_decode_free(AVCodecContext *avctx)
  {
      HEVCContext       *s = avctx->priv_data;
-    HEVCLocalContext *lc = &s->HEVClc;
      int i;
  
      pic_arrays_free(s);
  
-    av_freep(&lc->edge_emu_buffer);
      av_freep(&s->md5_ctx);
  
      av_frame_free(&s->tmp_frame);
@@ -2983,7 +3054,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
      if (!s->md5_ctx)
          goto fail;
  
-    ff_dsputil_init(&s->dsp, avctx);
+    ff_bswapdsp_init(&s->bdsp);
  
      s->context_initialized = 1;