]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/hevc.c
h2645_parse: add support for parsing h264
[ffmpeg] / libavcodec / hevc.c
index 88198fca8cc6a3bf7413e52f012530f67ce7fde2..3bc730a68e9abb3900d02fce2778199c3c94509f 100644 (file)
 #include "cabac_functions.h"
 #include "golomb.h"
 #include "hevc.h"
+#include "profiles.h"
 
-const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
-const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
-const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
+const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 3 };
+const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 4, 4, 4 };
+const uint8_t ff_hevc_qpel_extra[4]        = { 0, 7, 7, 7 };
 
 static const uint8_t scan_1x1[1] = { 0 };
 
@@ -82,20 +83,6 @@ static const uint8_t diag_scan2x2_inv[2][2] = {
     { 1, 3, },
 };
 
-const uint8_t ff_hevc_diag_scan4x4_x[16] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 1, 2,
-    3, 2, 3, 3,
-};
-
-const uint8_t ff_hevc_diag_scan4x4_y[16] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 3, 2,
-    1, 3, 2, 3,
-};
-
 static const uint8_t diag_scan4x4_inv[4][4] = {
     { 0,  2,  5,  9, },
     { 1,  4,  8, 12, },
@@ -103,44 +90,6 @@ static const uint8_t diag_scan4x4_inv[4][4] = {
     { 6, 10, 13, 15, },
 };
 
-const uint8_t ff_hevc_diag_scan8x8_x[64] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 0, 1,
-    2, 3, 4, 0,
-    1, 2, 3, 4,
-    5, 0, 1, 2,
-    3, 4, 5, 6,
-    0, 1, 2, 3,
-    4, 5, 6, 7,
-    1, 2, 3, 4,
-    5, 6, 7, 2,
-    3, 4, 5, 6,
-    7, 3, 4, 5,
-    6, 7, 4, 5,
-    6, 7, 5, 6,
-    7, 6, 7, 7,
-};
-
-const uint8_t ff_hevc_diag_scan8x8_y[64] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 4, 3,
-    2, 1, 0, 5,
-    4, 3, 2, 1,
-    0, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 7,
-    6, 5, 4, 3,
-    2, 7, 6, 5,
-    4, 3, 7, 6,
-    5, 4, 7, 6,
-    5, 7, 6, 7,
-};
-
 static const uint8_t diag_scan8x8_inv[8][8] = {
     {  0,  2,  5,  9, 14, 20, 27, 35, },
     {  1,  4,  8, 13, 19, 26, 34, 42, },
@@ -252,7 +201,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
     uint8_t chroma_weight_l1_flag[16];
 
     s->sh.luma_log2_weight_denom = av_clip(get_ue_golomb_long(gb), 0, 7);
-    if (s->sps->chroma_format_idc != 0) {
+    if (s->ps.sps->chroma_format_idc != 0) {
         int delta = get_se_golomb(gb);
         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
     }
@@ -264,7 +213,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
             s->sh.luma_offset_l0[i] = 0;
         }
     }
-    if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
+    if (s->ps.sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
         for (i = 0; i < s->sh.nb_refs[L0]; i++)
             chroma_weight_l0_flag[i] = get_bits1(gb);
     } else {
@@ -300,7 +249,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
                 s->sh.luma_offset_l1[i] = 0;
             }
         }
-        if (s->sps->chroma_format_idc != 0) {
+        if (s->ps.sps->chroma_format_idc != 0) {
             for (i = 0; i < s->sh.nb_refs[L1]; i++)
                 chroma_weight_l1_flag[i] = get_bits1(gb);
         } else {
@@ -333,7 +282,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 
 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 {
-    const HEVCSPS *sps = s->sps;
+    const HEVCSPS *sps = s->ps.sps;
     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
     int prev_delta_msb = 0;
     unsigned int nb_sps = 0, nb_sh;
@@ -383,10 +332,10 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
     return 0;
 }
 
-static void export_stream_params(AVCodecContext *avctx,
-                                 const HEVCContext *s, const HEVCSPS *sps)
+static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
+                                 const HEVCSPS *sps)
 {
-    const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
+    const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
     unsigned int num = 0, den = 0;
 
     avctx->pix_fmt             = sps->pix_fmt;
@@ -431,23 +380,32 @@ static void export_stream_params(AVCodecContext *avctx,
 
 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 {
-    #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
+    #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
     int ret;
 
-    export_stream_params(s->avctx, s, sps);
-
     pic_arrays_free(s);
+    s->ps.sps = NULL;
+    s->ps.vps = NULL;
+
+    if (!sps)
+        return 0;
+
     ret = pic_arrays_init(s, sps);
     if (ret < 0)
         goto fail;
 
+    export_stream_params(s->avctx, &s->ps, sps);
+
     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 #if CONFIG_HEVC_DXVA2_HWACCEL
         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 #endif
 #if CONFIG_HEVC_D3D11VA_HWACCEL
         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
+#endif
+#if CONFIG_HEVC_VDPAU_HWACCEL
+        *fmt++ = AV_PIX_FMT_VDPAU;
 #endif
     }
 
@@ -471,14 +429,14 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
         s->frame = s->tmp_frame;
     }
 
-    s->sps = sps;
-    s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
+    s->ps.sps = sps;
+    s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 
     return 0;
 
 fail:
     pic_arrays_free(s);
-    s->sps = NULL;
+    s->ps.sps = NULL;
     return ret;
 }
 
@@ -500,22 +458,22 @@ static int hls_slice_header(HEVCContext *s)
         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 
     sh->pps_id = get_ue_golomb_long(gb);
-    if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
+    if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
         return AVERROR_INVALIDDATA;
     }
     if (!sh->first_slice_in_pic_flag &&
-        s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
+        s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
         return AVERROR_INVALIDDATA;
     }
-    s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
+    s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 
-    if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
-        s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
+    if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
+        s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 
         ff_hevc_clear_refs(s);
-        ret = set_sps(s, s->sps);
+        ret = set_sps(s, s->ps.sps);
         if (ret < 0)
             return ret;
 
@@ -527,13 +485,13 @@ static int hls_slice_header(HEVCContext *s)
     if (!sh->first_slice_in_pic_flag) {
         int slice_address_length;
 
-        if (s->pps->dependent_slice_segments_enabled_flag)
+        if (s->ps.pps->dependent_slice_segments_enabled_flag)
             sh->dependent_slice_segment_flag = get_bits1(gb);
 
-        slice_address_length = av_ceil_log2(s->sps->ctb_width *
-                                            s->sps->ctb_height);
+        slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
+                                            s->ps.sps->ctb_height);
         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
-        if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
+        if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
             av_log(s->avctx, AV_LOG_ERROR,
                    "Invalid slice segment address: %u.\n",
                    sh->slice_segment_addr);
@@ -553,7 +511,7 @@ static int hls_slice_header(HEVCContext *s)
     if (!sh->dependent_slice_segment_flag) {
         s->slice_initialized = 0;
 
-        for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
+        for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 
         sh->slice_type = get_ue_golomb_long(gb);
@@ -571,16 +529,16 @@ static int hls_slice_header(HEVCContext *s)
 
         // when flag is not present, picture is inferred to be output
         sh->pic_output_flag = 1;
-        if (s->pps->output_flag_present_flag)
+        if (s->ps.pps->output_flag_present_flag)
             sh->pic_output_flag = get_bits1(gb);
 
-        if (s->sps->separate_colour_plane_flag)
+        if (s->ps.sps->separate_colour_plane_flag)
             sh->colour_plane_id = get_bits(gb, 2);
 
         if (!IS_IDR(s)) {
-            int poc;
+            int poc, pos;
 
-            sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
+            sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
                 av_log(s->avctx, AV_LOG_WARNING,
@@ -592,35 +550,37 @@ static int hls_slice_header(HEVCContext *s)
             s->poc = poc;
 
             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
+            pos = get_bits_left(gb);
             if (!sh->short_term_ref_pic_set_sps_flag) {
-                int pos = get_bits_left(gb);
-                ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->sps, 1);
+                ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
                 if (ret < 0)
                     return ret;
 
-                sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
                 sh->short_term_rps = &sh->slice_rps;
             } else {
                 int numbits, rps_idx;
 
-                if (!s->sps->nb_st_rps) {
+                if (!s->ps.sps->nb_st_rps) {
                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
                     return AVERROR_INVALIDDATA;
                 }
 
-                numbits = av_ceil_log2(s->sps->nb_st_rps);
+                numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
-                sh->short_term_rps = &s->sps->st_rps[rps_idx];
+                sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
             }
+            sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 
+            pos = get_bits_left(gb);
             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
             if (ret < 0) {
                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
                     return AVERROR_INVALIDDATA;
             }
+            sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 
-            if (s->sps->sps_temporal_mvp_enabled_flag)
+            if (s->ps.sps->sps_temporal_mvp_enabled_flag)
                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
             else
                 sh->slice_temporal_mvp_enabled_flag = 0;
@@ -640,7 +600,7 @@ static int hls_slice_header(HEVCContext *s)
             s->nal_unit_type != NAL_RASL_R)
             s->pocTid0 = s->poc;
 
-        if (s->sps->sao_enabled) {
+        if (s->ps.sps->sao_enabled) {
             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
             sh->slice_sample_adaptive_offset_flag[1] =
             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
@@ -654,9 +614,9 @@ static int hls_slice_header(HEVCContext *s)
         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
             int nb_refs;
 
-            sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
+            sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
             if (sh->slice_type == B_SLICE)
-                sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
+                sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 
             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
@@ -677,7 +637,7 @@ static int hls_slice_header(HEVCContext *s)
                 return AVERROR_INVALIDDATA;
             }
 
-            if (s->pps->lists_modification_present_flag && nb_refs > 1) {
+            if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
                 sh->rpl_modification_flag[0] = get_bits1(gb);
                 if (sh->rpl_modification_flag[0]) {
                     for (i = 0; i < sh->nb_refs[L0]; i++)
@@ -695,7 +655,7 @@ static int hls_slice_header(HEVCContext *s)
             if (sh->slice_type == B_SLICE)
                 sh->mvd_l1_zero_flag = get_bits1(gb);
 
-            if (s->pps->cabac_init_present_flag)
+            if (s->ps.pps->cabac_init_present_flag)
                 sh->cabac_init_flag = get_bits1(gb);
             else
                 sh->cabac_init_flag = 0;
@@ -717,8 +677,8 @@ static int hls_slice_header(HEVCContext *s)
                 }
             }
 
-            if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
-                (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
+            if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
+                (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
                 pred_weight_table(s, gb);
             }
 
@@ -733,7 +693,7 @@ static int hls_slice_header(HEVCContext *s)
 
         sh->slice_qp_delta = get_se_golomb(gb);
 
-        if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
+        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
             sh->slice_cb_qp_offset = get_se_golomb(gb);
             sh->slice_cr_qp_offset = get_se_golomb(gb);
         } else {
@@ -741,10 +701,10 @@ static int hls_slice_header(HEVCContext *s)
             sh->slice_cr_qp_offset = 0;
         }
 
-        if (s->pps->deblocking_filter_control_present_flag) {
+        if (s->ps.pps->deblocking_filter_control_present_flag) {
             int deblocking_filter_override_flag = 0;
 
-            if (s->pps->deblocking_filter_override_enabled_flag)
+            if (s->ps.pps->deblocking_filter_override_enabled_flag)
                 deblocking_filter_override_flag = get_bits1(gb);
 
             if (deblocking_filter_override_flag) {
@@ -754,9 +714,9 @@ static int hls_slice_header(HEVCContext *s)
                     sh->tc_offset   = get_se_golomb(gb) * 2;
                 }
             } else {
-                sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
-                sh->beta_offset                    = s->pps->beta_offset;
-                sh->tc_offset                      = s->pps->tc_offset;
+                sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
+                sh->beta_offset                    = s->ps.pps->beta_offset;
+                sh->tc_offset                      = s->ps.pps->tc_offset;
             }
         } else {
             sh->disable_deblocking_filter_flag = 0;
@@ -764,13 +724,13 @@ static int hls_slice_header(HEVCContext *s)
             sh->tc_offset                      = 0;
         }
 
-        if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
+        if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
             (sh->slice_sample_adaptive_offset_flag[0] ||
              sh->slice_sample_adaptive_offset_flag[1] ||
              !sh->disable_deblocking_filter_flag)) {
             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
         } else {
-            sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
+            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
         }
     } else if (!s->slice_initialized) {
         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
@@ -778,7 +738,7 @@ static int hls_slice_header(HEVCContext *s)
     }
 
     sh->num_entry_point_offsets = 0;
-    if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
+    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
         if (sh->num_entry_point_offsets > 0) {
             int offset_len = get_ue_golomb_long(gb) + 1;
@@ -788,21 +748,21 @@ static int hls_slice_header(HEVCContext *s)
         }
     }
 
-    if (s->pps->slice_header_extension_present_flag) {
+    if (s->ps.pps->slice_header_extension_present_flag) {
         unsigned int length = get_ue_golomb_long(gb);
         for (i = 0; i < length; i++)
             skip_bits(gb, 8);  // slice_header_extension_data_byte
     }
 
     // Inferred parameters
-    sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    sh->slice_qp = 26 + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
     if (sh->slice_qp > 51 ||
-        sh->slice_qp < -s->sps->qp_bd_offset) {
+        sh->slice_qp < -s->ps.sps->qp_bd_offset) {
         av_log(s->avctx, AV_LOG_ERROR,
                "The slice_qp %d is outside the valid range "
                "[%d, 51].\n",
                sh->slice_qp,
-               -s->sps->qp_bd_offset);
+               -s->ps.sps->qp_bd_offset);
         return AVERROR_INVALIDDATA;
     }
 
@@ -815,16 +775,16 @@ static int hls_slice_header(HEVCContext *s)
 
     s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
 
-    if (!s->pps->cu_qp_delta_enabled_flag)
-        s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
-                                52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
+    if (!s->ps.pps->cu_qp_delta_enabled_flag)
+        s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->ps.sps->qp_bd_offset,
+                                52 + s->ps.sps->qp_bd_offset) - s->ps.sps->qp_bd_offset;
 
     s->slice_initialized = 1;
 
     return 0;
 }
 
-#define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
+#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 
 #define SET_SAO(elem, value)                            \
 do {                                                    \
@@ -843,7 +803,7 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry)
     HEVCLocalContext *lc    = &s->HEVClc;
     int sao_merge_left_flag = 0;
     int sao_merge_up_flag   = 0;
-    int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
+    int shift               = s->ps.sps->bit_depth - FFMIN(s->ps.sps->bit_depth, 10);
     SAOParams *sao          = &CTB(s->sao, rx, ry);
     int c_idx, i;
 
@@ -933,10 +893,10 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
     const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
 
     ptrdiff_t stride = s->frame->linesize[c_idx];
-    int hshift       = s->sps->hshift[c_idx];
-    int vshift       = s->sps->vshift[c_idx];
+    int hshift       = s->ps.sps->hshift[c_idx];
+    int vshift       = s->ps.sps->vshift[c_idx];
     uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
-                                              ((x0 >> hshift) << s->sps->pixel_shift)];
+                                              ((x0 >> hshift) << s->ps.sps->pixel_shift)];
     DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
     DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
 
@@ -966,16 +926,16 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
         int qp_y = lc->qp_y;
 
         if (c_idx == 0) {
-            qp = qp_y + s->sps->qp_bd_offset;
+            qp = qp_y + s->ps.sps->qp_bd_offset;
         } else {
             int qp_i, offset;
 
             if (c_idx == 1)
-                offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
+                offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
             else
-                offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
+                offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
 
-            qp_i = av_clip(qp_y + offset, -s->sps->qp_bd_offset, 57);
+            qp_i = av_clip(qp_y + offset, -s->ps.sps->qp_bd_offset, 57);
             if (qp_i < 30)
                 qp = qp_i;
             else if (qp_i > 43)
@@ -983,18 +943,18 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
             else
                 qp = qp_c[qp_i - 30];
 
-            qp += s->sps->qp_bd_offset;
+            qp += s->ps.sps->qp_bd_offset;
         }
 
-        shift    = s->sps->bit_depth + log2_trafo_size - 5;
+        shift    = s->ps.sps->bit_depth + log2_trafo_size - 5;
         add      = 1 << (shift - 1);
         scale    = level_scale[rem6[qp]] << (div6[qp]);
         scale_m  = 16; // default when no custom scaling lists.
         dc_scale = 16;
 
-        if (s->sps->scaling_list_enable_flag) {
-            const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
-                                    &s->pps->scaling_list : &s->sps->scaling_list;
+        if (s->ps.sps->scaling_list_enable_flag) {
+            const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
+                                    &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
             int matrix_id = lc->cu.pred_mode != MODE_INTRA;
 
             if (log2_trafo_size != 5)
@@ -1006,7 +966,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
         }
     }
 
-    if (s->pps->transform_skip_enabled_flag &&
+    if (s->ps.pps->transform_skip_enabled_flag &&
         !lc->cu.cu_transquant_bypass_flag   &&
         log2_trafo_size == 2) {
         transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
@@ -1190,7 +1150,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
             if (first_greater1_coeff_idx != -1) {
                 coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
             }
-            if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
+            if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden) {
                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
             } else {
                 coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
@@ -1208,7 +1168,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
                     if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
                         c_rice_param = FFMIN(c_rice_param + 1, 4);
                 }
-                if (s->pps->sign_data_hiding_flag && sign_hidden) {
+                if (s->ps.pps->sign_data_hiding_flag && sign_hidden) {
                     sum_abs += trans_coeff_level;
                     if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
                         trans_coeff_level = -trans_coeff_level;
@@ -1217,7 +1177,7 @@ static void hls_residual_coding(HEVCContext *s, int x0, int y0,
                     trans_coeff_level = -trans_coeff_level;
                 coeff_sign_flag <<= 1;
                 if (!lc->cu.cu_transquant_bypass_flag) {
-                    if (s->sps->scaling_list_enable_flag) {
+                    if (s->ps.sps->scaling_list_enable_flag) {
                         if (y_c || x_c || log2_trafo_size < 4) {
                             int pos;
                             switch (log2_trafo_size) {
@@ -1271,12 +1231,12 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 
         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
         if (log2_trafo_size > 2) {
-            trafo_size = trafo_size << (s->sps->hshift[1] - 1);
+            trafo_size = trafo_size << (s->ps.sps->hshift[1] - 1);
             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
             s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
         } else if (blk_idx == 3) {
-            trafo_size = trafo_size << s->sps->hshift[1];
+            trafo_size = trafo_size << s->ps.sps->hshift[1];
             ff_hevc_set_neighbour_available(s, xBase, yBase,
                                             trafo_size, trafo_size);
             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
@@ -1288,21 +1248,21 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
         int scan_idx   = SCAN_DIAG;
         int scan_idx_c = SCAN_DIAG;
 
-        if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
+        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
             if (lc->tu.cu_qp_delta != 0)
                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
             lc->tu.is_cu_qp_delta_coded = 1;
 
-            if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
-                lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
+            if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
+                lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "The cu_qp_delta %d is outside the valid range "
                        "[%d, %d].\n",
                        lc->tu.cu_qp_delta,
-                       -(26 + s->sps->qp_bd_offset / 2),
-                        (25 + s->sps->qp_bd_offset / 2));
+                       -(26 + s->ps.sps->qp_bd_offset / 2),
+                        (25 + s->ps.sps->qp_bd_offset / 2));
                 return AVERROR_INVALIDDATA;
             }
 
@@ -1347,11 +1307,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
     int cb_size          = 1 << log2_cb_size;
-    int log2_min_pu_size = s->sps->log2_min_pu_size;
+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
 
-    int min_pu_width     = s->sps->min_pu_width;
-    int x_end = FFMIN(x0 + cb_size, s->sps->width);
-    int y_end = FFMIN(y0 + cb_size, s->sps->height);
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
+    int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
     int i, j;
 
     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
@@ -1376,18 +1336,18 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0,
         lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
     }
 
-    if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
-        log2_trafo_size >  s->sps->log2_min_tb_size    &&
+    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
+        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
         trafo_depth     < lc->cu.max_trafo_depth       &&
         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
     } else {
-        int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
+        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
                           lc->cu.pred_mode == MODE_INTER &&
                           lc->cu.part_mode != PART_2Nx2N &&
                           trafo_depth == 0;
 
-        split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
+        split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
                                inter_split;
     }
@@ -1422,9 +1382,9 @@ do {
 
 #undef SUBDIVIDE
     } else {
-        int min_tu_size      = 1 << s->sps->log2_min_tb_size;
-        int log2_min_tu_size = s->sps->log2_min_tb_size;
-        int min_tu_width     = s->sps->min_tb_width;
+        int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
+        int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
+        int min_tu_width     = s->ps.sps->min_tb_width;
         int cbf_luma         = 1;
 
         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
@@ -1448,7 +1408,7 @@ do {
         }
         if (!s->sh.disable_deblocking_filter_flag) {
             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
-            if (s->pps->transquant_bypass_enable_flag &&
+            if (s->ps.pps->transquant_bypass_enable_flag &&
                 lc->cu.cu_transquant_bypass_flag)
                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
         }
@@ -1463,13 +1423,13 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
     GetBitContext gb;
     int cb_size   = 1 << log2_cb_size;
     int stride0   = s->frame->linesize[0];
-    uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
+    uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
     int   stride1 = s->frame->linesize[1];
-    uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
+    uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
     int   stride2 = s->frame->linesize[2];
-    uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
+    uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
 
-    int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
+    int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->ps.sps->pcm.bit_depth_chroma;
     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
     int ret;
 
@@ -1480,9 +1440,9 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
     if (ret < 0)
         return ret;
 
-    s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
-    s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
-    s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
+    s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
+    s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
+    s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->ps.sps->pcm.bit_depth_chroma);
     return 0;
 }
 
@@ -1525,13 +1485,13 @@ static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
  */
 static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
                     AVFrame *ref, const Mv *mv, int x_off, int y_off,
-                    int block_w, int block_h)
+                    int block_w, int block_h, int pred_idx)
 {
     HEVCLocalContext *lc = &s->HEVClc;
     uint8_t *src         = ref->data[0];
     ptrdiff_t srcstride  = ref->linesize[0];
-    int pic_width        = s->sps->width;
-    int pic_height       = s->sps->height;
+    int pic_width        = s->ps.sps->width;
+    int pic_height       = s->ps.sps->height;
 
     int mx         = mv->x & 3;
     int my         = mv->y & 3;
@@ -1540,15 +1500,15 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
 
     x_off += mv->x >> 2;
     y_off += mv->y >> 2;
-    src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
+    src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
 
     if (x_off < extra_left || y_off < extra_top ||
         x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
         y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset = extra_top * srcstride + (extra_left << s->ps.sps->pixel_shift);
         int buf_offset = extra_top *
-                         edge_emu_stride + (extra_left << s->sps->pixel_shift);
+                         edge_emu_stride + (extra_left << s->ps.sps->pixel_shift);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
                                  edge_emu_stride, srcstride,
@@ -1559,8 +1519,8 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
         src = lc->edge_emu_buffer + buf_offset;
         srcstride = edge_emu_stride;
     }
-    s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
-                                     block_h, lc->mc_buffer);
+    s->hevcdsp.put_hevc_qpel[!!my][!!mx][pred_idx](dst, dststride, src, srcstride,
+                                                   block_h, mx, my, lc->mc_buffer);
 }
 
 /**
@@ -1579,34 +1539,34 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
  */
 static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
                       ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
-                      int x_off, int y_off, int block_w, int block_h)
+                      int x_off, int y_off, int block_w, int block_h, int pred_idx)
 {
     HEVCLocalContext *lc = &s->HEVClc;
     uint8_t *src1        = ref->data[1];
     uint8_t *src2        = ref->data[2];
     ptrdiff_t src1stride = ref->linesize[1];
     ptrdiff_t src2stride = ref->linesize[2];
-    int pic_width        = s->sps->width >> 1;
-    int pic_height       = s->sps->height >> 1;
+    int pic_width        = s->ps.sps->width >> 1;
+    int pic_height       = s->ps.sps->height >> 1;
 
     int mx = mv->x & 7;
     int my = mv->y & 7;
 
     x_off += mv->x >> 3;
     y_off += mv->y >> 3;
-    src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
-    src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
+    src1  += y_off * src1stride + (x_off * (1 << s->ps.sps->pixel_shift));
+    src2  += y_off * src2stride + (x_off * (1 << s->ps.sps->pixel_shift));
 
     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
-        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
         int buf_offset1 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
-        int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
+        int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
         int buf_offset2 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
+                          (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
                                  edge_emu_stride, src1stride,
@@ -1617,8 +1577,8 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
 
         src1 = lc->edge_emu_buffer + buf_offset1;
         src1stride = edge_emu_stride;
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my, lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
+                                                       block_h, mx, my, lc->mc_buffer);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
                                  edge_emu_stride, src2stride,
@@ -1629,16 +1589,13 @@ static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
         src2 = lc->edge_emu_buffer + buf_offset2;
         src2stride = edge_emu_stride;
 
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
+                                                       block_h, mx, my, lc->mc_buffer);
     } else {
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst1, dststride, src1, src1stride,
+                                                       block_h, mx, my, lc->mc_buffer);
+        s->hevcdsp.put_hevc_epel[!!my][!!mx][pred_idx](dst2, dststride, src2, src2stride,
+                                                       block_h, mx, my, lc->mc_buffer);
     }
 }
 
@@ -1697,26 +1654,31 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                                 int nPbW, int nPbH,
                                 int log2_cb_size, int partIdx)
 {
+    static const int pred_indices[] = {
+        [4] = 0, [8] = 1, [12] = 2, [16] = 3, [24] = 4, [32] = 5, [48] = 6, [64] = 7,
+    };
+    const int pred_idx = pred_indices[nPbW];
+
 #define POS(c_idx, x, y)                                                              \
-    &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
-                           (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
+    &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
+                           (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
     HEVCLocalContext *lc = &s->HEVClc;
     int merge_idx = 0;
     struct MvField current_mv = {{{ 0 }}};
 
-    int min_pu_width = s->sps->min_pu_width;
+    int min_pu_width = s->ps.sps->min_pu_width;
 
     MvField *tab_mvf = s->ref->tab_mvf;
     RefPicList  *refPicList = s->ref->refPicList;
     HEVCFrame *ref0, *ref1;
 
-    int tmpstride = MAX_PB_SIZE;
+    int tmpstride = MAX_PB_SIZE * sizeof(int16_t);
 
     uint8_t *dst0 = POS(0, x0, y0);
     uint8_t *dst1 = POS(1, x0, y0);
     uint8_t *dst2 = POS(2, x0, y0);
-    int log2_min_cb_size = s->sps->log2_min_cb_size;
-    int min_cb_width     = s->sps->min_cb_width;
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
+    int min_cb_width     = s->ps.sps->min_cb_width;
     int x_cb             = x0 >> log2_min_cb_size;
     int y_cb             = y0 >> log2_min_cb_size;
     int x_pu, y_pu;
@@ -1740,11 +1702,11 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                               partIdx, merge_idx, &current_mv);
     }
 
-    x_pu = x0 >> s->sps->log2_min_pu_size;
-    y_pu = y0 >> s->sps->log2_min_pu_size;
+    x_pu = x0 >> s->ps.sps->log2_min_pu_size;
+    y_pu = y0 >> s->ps.sps->log2_min_pu_size;
 
-    for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
-        for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
+    for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
+        for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
 
     if (current_mv.pred_flag[0]) {
@@ -1765,71 +1727,71 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                     s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                     dst0, s->frame->linesize[0], tmp,
-                                     tmpstride, nPbW, nPbH);
+                &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
+                                               s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                                               s->sh.luma_offset_l0[current_mv.ref_idx[0]],
+                                               dst0, s->frame->linesize[0], tmp,
+                                               tmpstride, nPbH);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
         }
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride,
-                                     nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride,
-                                     nPbW / 2, nPbH / 2);
+                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
+                                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
+                                                      dst1, s->frame->linesize[1], tmp, tmpstride,
+                                                      nPbH / 2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
+                                                      s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
+                                                      dst2, s->frame->linesize[2], tmp2, tmpstride,
+                                                      nPbH / 2);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
         }
     } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
         DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                      s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                      s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                      dst0, s->frame->linesize[0], tmp, tmpstride,
-                                      nPbW, nPbH);
+                &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred[pred_idx](s->sh.luma_log2_weight_denom,
+                                               s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                                               s->sh.luma_offset_l1[current_mv.ref_idx[1]],
+                                               dst0, s->frame->linesize[0], tmp, tmpstride,
+                                               nPbH);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred[pred_idx](dst0, s->frame->linesize[0], tmp, tmpstride, nPbH);
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
+                                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
+                                                      dst1, s->frame->linesize[1], tmp, tmpstride, nPbH/2);
+            s->hevcdsp.weighted_pred_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
+                                                      s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
+                                                      dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH/2);
         } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst1, s->frame->linesize[1], tmp,  tmpstride, nPbH / 2);
+            s->hevcdsp.put_unweighted_pred_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmpstride, nPbH / 2);
         }
     } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
         DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
@@ -1838,48 +1800,48 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
 
         luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
+                &current_mv.mv[0], x0, y0, nPbW, nPbH, pred_idx);
         luma_mc(s, tmp2, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
-                                         s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                         s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                         dst0, s->frame->linesize[0],
-                                         tmp, tmp2, tmpstride, nPbW, nPbH);
+                &current_mv.mv[1], x0, y0, nPbW, nPbH, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred_avg[pred_idx](s->sh.luma_log2_weight_denom,
+                                                   s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                                                   s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                                                   s->sh.luma_offset_l0[current_mv.ref_idx[0]],
+                                                   s->sh.luma_offset_l1[current_mv.ref_idx[1]],
+                                                   dst0, s->frame->linesize[0],
+                                                   tmp, tmp2, tmpstride, nPbH);
         } else {
-            s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
-                                             tmp, tmp2, tmpstride, nPbW, nPbH);
+            s->hevcdsp.put_unweighted_pred_avg[pred_idx](dst0, s->frame->linesize[0],
+                                                         tmp, tmp2, tmpstride, nPbH);
         }
 
         chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
+                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
         chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                         dst1, s->frame->linesize[1], tmp, tmp3,
-                                         tmpstride, nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                         dst2, s->frame->linesize[2], tmp2, tmp4,
-                                         tmpstride, nPbW / 2, nPbH / 2);
+                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2, pred_idx);
+
+        if ((s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
+            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag)) {
+            s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
+                                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
+                                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
+                                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
+                                                          dst1, s->frame->linesize[1], tmp, tmp3,
+                                                          tmpstride, nPbH / 2);
+            s->hevcdsp.weighted_pred_avg_chroma[pred_idx](s->sh.chroma_log2_weight_denom,
+                                                          s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
+                                                          s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
+                                                          s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
+                                                          s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
+                                                          dst2, s->frame->linesize[2], tmp2, tmp4,
+                                                          tmpstride, nPbH / 2);
         } else {
-            s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst1, s->frame->linesize[1], tmp, tmp3,  tmpstride, nPbH/2);
+            s->hevcdsp.put_unweighted_pred_avg_chroma[pred_idx](dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbH/2);
         }
     }
 }
@@ -1891,19 +1853,19 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
                                 int prev_intra_luma_pred_flag)
 {
     HEVCLocalContext *lc = &s->HEVClc;
-    int x_pu             = x0 >> s->sps->log2_min_pu_size;
-    int y_pu             = y0 >> s->sps->log2_min_pu_size;
-    int min_pu_width     = s->sps->min_pu_width;
-    int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
-    int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
-    int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
+    int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
+    int x0b              = x0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
+    int y0b              = y0 & ((1 << s->ps.sps->log2_ctb_size) - 1);
 
     int cand_up   = (lc->ctb_up_flag || y0b) ?
                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
     int cand_left = (lc->ctb_left_flag || x0b) ?
                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
 
-    int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
+    int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
 
     MvField *tab_mvf = s->ref->tab_mvf;
     int intra_pred_mode;
@@ -1978,13 +1940,13 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
                                           int log2_cb_size, int ct_depth)
 {
-    int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
-    int x_cb   = x0 >> s->sps->log2_min_cb_size;
-    int y_cb   = y0 >> s->sps->log2_min_cb_size;
+    int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
+    int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
+    int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
     int y;
 
     for (y = 0; y < length; y++)
-        memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
+        memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
                ct_depth, length);
 }
 
@@ -2034,11 +1996,11 @@ static void intra_prediction_unit_default_value(HEVCContext *s,
 {
     HEVCLocalContext *lc = &s->HEVClc;
     int pb_size          = 1 << log2_cb_size;
-    int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
-    int min_pu_width     = s->sps->min_pu_width;
+    int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
+    int min_pu_width     = s->ps.sps->min_pu_width;
     MvField *tab_mvf     = s->ref->tab_mvf;
-    int x_pu             = x0 >> s->sps->log2_min_pu_size;
-    int y_pu             = y0 >> s->sps->log2_min_pu_size;
+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
     int j, k;
 
     if (size_in_pus == 0)
@@ -2054,9 +2016,9 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
     int cb_size          = 1 << log2_cb_size;
     HEVCLocalContext *lc = &s->HEVClc;
-    int log2_min_cb_size = s->sps->log2_min_cb_size;
+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
     int length           = cb_size >> log2_min_cb_size;
-    int min_cb_width     = s->sps->min_cb_width;
+    int min_cb_width     = s->ps.sps->min_cb_width;
     int x_cb             = x0 >> log2_min_cb_size;
     int y_cb             = y0 >> log2_min_cb_size;
     int x, y, ret;
@@ -2070,7 +2032,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
     for (x = 0; x < 4; x++)
         lc->pu.intra_pred_mode[x] = 1;
-    if (s->pps->transquant_bypass_enable_flag) {
+    if (s->ps.pps->transquant_bypass_enable_flag) {
         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
         if (lc->cu.cu_transquant_bypass_flag)
             set_deblocking_bypass(s, x0, y0, log2_cb_size);
@@ -2100,22 +2062,22 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
         if (s->sh.slice_type != I_SLICE)
             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
         if (lc->cu.pred_mode != MODE_INTRA ||
-            log2_cb_size == s->sps->log2_min_cb_size) {
+            log2_cb_size == s->ps.sps->log2_min_cb_size) {
             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
                                       lc->cu.pred_mode == MODE_INTRA;
         }
 
         if (lc->cu.pred_mode == MODE_INTRA) {
-            if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
-                log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
-                log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
+            if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
+                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
+                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
                 pcm_flag = ff_hevc_pcm_flag_decode(s);
             }
             if (pcm_flag) {
                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
-                if (s->sps->pcm.loop_filter_disable_flag)
+                if (s->ps.sps->pcm.loop_filter_disable_flag)
                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
 
                 if (ret < 0)
@@ -2171,8 +2133,8 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
             }
             if (rqt_root_cbf) {
                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
-                                         s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
-                                         s->sps->max_transform_hierarchy_depth_inter;
+                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
+                                         s->ps.sps->max_transform_hierarchy_depth_inter;
                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
                                          log2_cb_size,
                                          log2_cb_size, 0, 0, 0, 0);
@@ -2185,7 +2147,7 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
         }
     }
 
-    if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
+    if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
 
     x = y_cb * min_cb_width + x_cb;
@@ -2207,15 +2169,15 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
     int split_cu;
 
     lc->ct.depth = cb_depth;
-    if (x0 + cb_size <= s->sps->width  &&
-        y0 + cb_size <= s->sps->height &&
-        log2_cb_size > s->sps->log2_min_cb_size) {
+    if (x0 + cb_size <= s->ps.sps->width  &&
+        y0 + cb_size <= s->ps.sps->height &&
+        log2_cb_size > s->ps.sps->log2_min_cb_size) {
         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
     } else {
-        split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
+        split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
     }
-    if (s->pps->cu_qp_delta_enabled_flag &&
-        log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
+    if (s->ps.pps->cu_qp_delta_enabled_flag &&
+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
         lc->tu.is_cu_qp_delta_coded = 0;
         lc->tu.cu_qp_delta          = 0;
     }
@@ -2230,7 +2192,7 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
 
 #define SUBDIVIDE(x, y)                                                \
 do {                                                                   \
-    if (x < s->sps->width && y < s->sps->height) {                     \
+    if (x < s->ps.sps->width && y < s->ps.sps->height) {                     \
         int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
         if (ret < 0)                                                   \
             return ret;                                                \
@@ -2254,77 +2216,77 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
                                  int ctb_addr_ts)
 {
     HEVCLocalContext *lc  = &s->HEVClc;
-    int ctb_size          = 1 << s->sps->log2_ctb_size;
-    int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+    int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
+    int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
 
     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
 
-    if (s->pps->entropy_coding_sync_enabled_flag) {
+    if (s->ps.pps->entropy_coding_sync_enabled_flag) {
         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
             lc->first_qp_group = 1;
-        lc->end_of_tiles_x = s->sps->width;
-    } else if (s->pps->tiles_enabled_flag) {
-        if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
-            int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
+        lc->end_of_tiles_x = s->ps.sps->width;
+    } else if (s->ps.pps->tiles_enabled_flag) {
+        if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
+            int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
             lc->start_of_tiles_x = x_ctb;
-            lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
+            lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
             lc->first_qp_group   = 1;
         }
     } else {
-        lc->end_of_tiles_x = s->sps->width;
+        lc->end_of_tiles_x = s->ps.sps->width;
     }
 
-    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
+    lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
 
     lc->boundary_flags = 0;
-    if (s->pps->tiles_enabled_flag) {
-        if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
+    if (s->ps.pps->tiles_enabled_flag) {
+        if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
-        if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
+        if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
-        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
+        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
     } else {
-        if (!ctb_addr_in_slice > 0)
+        if (!ctb_addr_in_slice)
             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
-        if (ctb_addr_in_slice < s->sps->ctb_width)
+        if (ctb_addr_in_slice < s->ps.sps->ctb_width)
             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
     }
 
     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
-    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
-    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
-    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
+    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
+    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
+    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
 }
 
 static int hls_slice_data(HEVCContext *s)
 {
-    int ctb_size    = 1 << s->sps->log2_ctb_size;
+    int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
     int more_data   = 1;
     int x_ctb       = 0;
     int y_ctb       = 0;
-    int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
+    int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
     int ret;
 
-    while (more_data && ctb_addr_ts < s->sps->ctb_size) {
-        int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+    while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
+        int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
 
-        x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
-        y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
+        x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
+        y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
 
         ff_hevc_cabac_init(s, ctb_addr_ts);
 
-        hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
+        hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
 
         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
 
-        ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
+        ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
         if (ret < 0)
             return ret;
         more_data = !ff_hevc_end_of_slice_flag_decode(s);
@@ -2334,55 +2296,29 @@ static int hls_slice_data(HEVCContext *s)
         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
     }
 
-    if (x_ctb + ctb_size >= s->sps->width &&
-        y_ctb + ctb_size >= s->sps->height)
+    if (x_ctb + ctb_size >= s->ps.sps->width &&
+        y_ctb + ctb_size >= s->ps.sps->height)
         ff_hevc_hls_filter(s, x_ctb, y_ctb);
 
     return ctb_addr_ts;
 }
 
-/**
- * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
- * 0 if the unit should be skipped, 1 otherwise
- */
-static int hls_nal_unit(HEVCContext *s)
-{
-    GetBitContext *gb = &s->HEVClc.gb;
-    int nuh_layer_id;
-
-    if (get_bits1(gb) != 0)
-        return AVERROR_INVALIDDATA;
-
-    s->nal_unit_type = get_bits(gb, 6);
-
-    nuh_layer_id   = get_bits(gb, 6);
-    s->temporal_id = get_bits(gb, 3) - 1;
-    if (s->temporal_id < 0)
-        return AVERROR_INVALIDDATA;
-
-    av_log(s->avctx, AV_LOG_DEBUG,
-           "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
-           s->nal_unit_type, nuh_layer_id, s->temporal_id);
-
-    return nuh_layer_id == 0;
-}
-
 static void restore_tqb_pixels(HEVCContext *s)
 {
-    int min_pu_size = 1 << s->sps->log2_min_pu_size;
+    int min_pu_size = 1 << s->ps.sps->log2_min_pu_size;
     int x, y, c_idx;
 
     for (c_idx = 0; c_idx < 3; c_idx++) {
         ptrdiff_t stride = s->frame->linesize[c_idx];
-        int hshift       = s->sps->hshift[c_idx];
-        int vshift       = s->sps->vshift[c_idx];
-        for (y = 0; y < s->sps->min_pu_height; y++) {
-            for (x = 0; x < s->sps->min_pu_width; x++) {
-                if (s->is_pcm[y * s->sps->min_pu_width + x]) {
+        int hshift       = s->ps.sps->hshift[c_idx];
+        int vshift       = s->ps.sps->vshift[c_idx];
+        for (y = 0; y < s->ps.sps->min_pu_height; y++) {
+            for (x = 0; x < s->ps.sps->min_pu_width; x++) {
+                if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
                     int n;
                     int len      = min_pu_size >> hshift;
-                    uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
-                    uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
+                    uint8_t *src = &s->frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
+                    uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->ps.sps->log2_min_pu_size) >> vshift) * stride + (((x << s->ps.sps->log2_min_pu_size) >> hshift) << s->ps.sps->pixel_shift)];
                     for (n = 0; n < (min_pu_size >> vshift); n++) {
                         memcpy(dst, src, len);
                         src += stride;
@@ -2450,17 +2386,17 @@ static int hevc_frame_start(HEVCContext *s)
 
     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
-    memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
-    memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
+    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+    memset(s->is_pcm,        0, s->ps.sps->min_pu_width * s->ps.sps->min_pu_height);
 
     lc->start_of_tiles_x = 0;
     s->is_decoded        = 0;
     s->first_nal_type    = s->nal_unit_type;
 
-    if (s->pps->tiles_enabled_flag)
-        lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
+    if (s->ps.pps->tiles_enabled_flag)
+        lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
 
-    ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
+    ret = ff_hevc_set_new_ref(s, s->ps.sps->sao_enabled ? &s->sao_frame : &s->frame,
                               s->poc);
     if (ret < 0)
         goto fail;
@@ -2493,37 +2429,30 @@ fail:
     return ret;
 }
 
-static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
+static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
 {
     HEVCLocalContext *lc = &s->HEVClc;
     GetBitContext *gb    = &lc->gb;
     int ctb_addr_ts, ret;
 
-    ret = init_get_bits8(gb, nal->data, nal->size);
-    if (ret < 0)
-        return ret;
-
-    ret = hls_nal_unit(s);
-    if (ret < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
-               s->nal_unit_type);
-        goto fail;
-    } else if (!ret)
-        return 0;
+    *gb              = nal->gb;
+    s->nal_unit_type = nal->type;
+    s->temporal_id   = nal->temporal_id;
 
     switch (s->nal_unit_type) {
     case NAL_VPS:
-        ret = ff_hevc_decode_nal_vps(s);
+        ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
     case NAL_SPS:
-        ret = ff_hevc_decode_nal_sps(s);
+        ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
+                                     s->apply_defdispwin);
         if (ret < 0)
             goto fail;
         break;
     case NAL_PPS:
-        ret = ff_hevc_decode_nal_pps(s);
+        ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
         if (ret < 0)
             goto fail;
         break;
@@ -2609,11 +2538,11 @@ static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
                 goto fail;
         } else {
             ctb_addr_ts = hls_slice_data(s);
-            if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
+            if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
                 s->is_decoded = 1;
-                if ((s->pps->transquant_bypass_enable_flag ||
-                     (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
-                    s->sps->sao_enabled)
+                if ((s->ps.pps->transquant_bypass_enable_flag ||
+                     (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) &&
+                    s->ps.sps->sao_enabled)
                     restore_tqb_pixels(s);
             }
 
@@ -2645,82 +2574,30 @@ fail:
 
 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 {
-    int i, consumed, ret = 0;
+    int i, ret = 0;
 
     s->ref = NULL;
     s->eos = 0;
 
     /* split the input packet into NAL units, so we know the upper bound on the
      * number of slices in the frame */
-    s->nb_nals = 0;
-    while (length >= 4) {
-        HEVCNAL *nal;
-        int extract_length = 0;
-
-        if (s->is_nalff) {
-            int i;
-            for (i = 0; i < s->nal_length_size; i++)
-                extract_length = (extract_length << 8) | buf[i];
-            buf    += s->nal_length_size;
-            length -= s->nal_length_size;
-
-            if (extract_length > length) {
-                av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-        } else {
-            if (buf[2] == 0) {
-                length--;
-                buf++;
-                continue;
-            }
-            if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-
-            buf           += 3;
-            length        -= 3;
-            extract_length = length;
-        }
-
-        if (s->nals_allocated < s->nb_nals + 1) {
-            int new_size = s->nals_allocated + 1;
-            HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
-            if (!tmp) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
-            s->nals = tmp;
-            memset(s->nals + s->nals_allocated, 0,
-                   (new_size - s->nals_allocated) * sizeof(*tmp));
-            s->nals_allocated = new_size;
-        }
-        nal = &s->nals[s->nb_nals++];
-
-        consumed = ff_hevc_extract_rbsp(buf, extract_length, nal);
-        if (consumed < 0) {
-            ret = consumed;
-            goto fail;
-        }
-
-        ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
-        if (ret < 0)
-            goto fail;
-        hls_nal_unit(s);
+    ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
+                                s->nal_length_size, s->avctx->codec_id);
+    if (ret < 0) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Error splitting the input into NAL units.\n");
+        return ret;
+    }
 
-        if (s->nal_unit_type == NAL_EOB_NUT ||
-            s->nal_unit_type == NAL_EOS_NUT)
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        if (s->pkt.nals[i].type == NAL_EOB_NUT ||
+            s->pkt.nals[i].type == NAL_EOS_NUT)
             s->eos = 1;
-
-        buf    += consumed;
-        length -= consumed;
     }
 
-    /* parse the NAL units */
-    for (i = 0; i < s->nb_nals; i++) {
-        ret = decode_nal_unit(s, &s->nals[i]);
+    /* decode the NAL units */
+    for (i = 0; i < s->pkt.nb_nals; i++) {
+        ret = decode_nal_unit(s, &s->pkt.nals[i]);
         if (ret < 0) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "Error parsing NAL unit #%d.\n", i);
@@ -2751,7 +2628,7 @@ static int verify_md5(HEVCContext *s, AVFrame *frame)
     if (!desc)
         return AVERROR(EINVAL);
 
-    pixel_shift = desc->comp[0].depth_minus1 > 7;
+    pixel_shift = desc->comp[0].depth > 8;
 
     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
            s->poc);
@@ -2914,17 +2791,14 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
         av_frame_free(&s->DPB[i].frame);
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
-        av_buffer_unref(&s->vps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
-        av_buffer_unref(&s->sps_list[i]);
-    for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
-        av_buffer_unref(&s->pps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
+        av_buffer_unref(&s->ps.vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
+        av_buffer_unref(&s->ps.sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
+        av_buffer_unref(&s->ps.pps_list[i]);
 
-    for (i = 0; i < s->nals_allocated; i++)
-        av_freep(&s->nals[i].rbsp_buffer);
-    av_freep(&s->nals);
-    s->nals_allocated = 0;
+    ff_h2645_packet_uninit(&s->pkt);
 
     return 0;
 }
@@ -2990,35 +2864,35 @@ static int hevc_update_thread_context(AVCodecContext *dst,
         }
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
-        av_buffer_unref(&s->vps_list[i]);
-        if (s0->vps_list[i]) {
-            s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
-            if (!s->vps_list[i])
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
+        av_buffer_unref(&s->ps.vps_list[i]);
+        if (s0->ps.vps_list[i]) {
+            s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
+            if (!s->ps.vps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
-        av_buffer_unref(&s->sps_list[i]);
-        if (s0->sps_list[i]) {
-            s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
-            if (!s->sps_list[i])
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
+        av_buffer_unref(&s->ps.sps_list[i]);
+        if (s0->ps.sps_list[i]) {
+            s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
+            if (!s->ps.sps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
-        av_buffer_unref(&s->pps_list[i]);
-        if (s0->pps_list[i]) {
-            s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
-            if (!s->pps_list[i])
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
+        av_buffer_unref(&s->ps.pps_list[i]);
+        if (s0->ps.pps_list[i]) {
+            s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
+            if (!s->ps.pps_list[i])
                 return AVERROR(ENOMEM);
         }
     }
 
-    if (s->sps != s0->sps)
-        ret = set_sps(s, s0->sps);
+    if (s->ps.sps != s0->ps.sps)
+        ret = set_sps(s, s0->ps.sps);
 
     s->seq_decode = s0->seq_decode;
     s->seq_output = s0->seq_output;
@@ -3099,10 +2973,10 @@ static int hevc_decode_extradata(HEVCContext *s)
     }
 
     /* export stream parameters from the first SPS */
-    for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
-        if (s->sps_list[i]) {
-            const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
-            export_stream_params(s->avctx, s, sps);
+    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
+        if (s->ps.sps_list[i]) {
+            const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
+            export_stream_params(s->avctx, &s->ps, sps);
             break;
         }
     }
@@ -3115,8 +2989,6 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     int ret;
 
-    ff_init_cabac_states();
-
     avctx->internal->allocate_progress = 1;
 
     ret = hevc_init_context(avctx);
@@ -3158,13 +3030,6 @@ static void hevc_decode_flush(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(HEVCContext, x)
 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
 
-static const AVProfile profiles[] = {
-    { FF_PROFILE_HEVC_MAIN,                 "Main"                },
-    { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
-    { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
-    { FF_PROFILE_UNKNOWN },
-};
-
 static const AVOption options[] = {
     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
@@ -3191,7 +3056,7 @@ AVCodec ff_hevc_decoder = {
     .flush                 = hevc_decode_flush,
     .update_thread_context = hevc_update_thread_context,
     .init_thread_copy      = hevc_init_thread_copy,
-    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-                             CODEC_CAP_FRAME_THREADS,
-    .profiles              = NULL_IF_CONFIG_SMALL(profiles),
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
+                             AV_CODEC_CAP_FRAME_THREADS,
+    .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
 };