git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41 #include "hevc_data.h"
  42 #include "hevc_parse.h"
  43 #include "hevcdec.h"
  44 #include "hwaccel.h"
  45 #include "profiles.h"
  46
  47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  48
  49 /**
  50  * NOTE: Each function hls_foo correspond to the function foo in the
  51  * specification (HLS stands for High Level Syntax).
  52  */
  53
  54 /**
  55  * Section 5.7
  56  */
  57
  58 /* free everything allocated  by pic_arrays_init() */
  59 static void pic_arrays_free(HEVCContext *s)
  60 {
  61     av_freep(&s->sao);
  62     av_freep(&s->deblock);
  63
  64     av_freep(&s->skip_flag);
  65     av_freep(&s->tab_ct_depth);
  66
  67     av_freep(&s->tab_ipm);
  68     av_freep(&s->cbf_luma);
  69     av_freep(&s->is_pcm);
  70
  71     av_freep(&s->qp_y_tab);
  72     av_freep(&s->tab_slice_address);
  73     av_freep(&s->filter_slice_edges);
  74
  75     av_freep(&s->horizontal_bs);
  76     av_freep(&s->vertical_bs);
  77
  78     av_freep(&s->sh.entry_point_offset);
  79     av_freep(&s->sh.size);
  80     av_freep(&s->sh.offset);
  81
  82     av_buffer_pool_uninit(&s->tab_mvf_pool);
  83     av_buffer_pool_uninit(&s->rpl_tab_pool);
  84 }
  85
  86 /* allocate arrays that depend on frame dimensions */
  87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  88 {
  89     int log2_min_cb_size = sps->log2_min_cb_size;
  90     int width            = sps->width;
  91     int height           = sps->height;
  92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  93                            ((height >> log2_min_cb_size) + 1);
  94     int ctb_count        = sps->ctb_width * sps->ctb_height;
  95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  96
  97     s->bs_width  = (width  >> 2) + 1;
  98     s->bs_height = (height >> 2) + 1;
  99
 100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 102     if (!s->sao || !s->deblock)
 103         goto fail;
 104
 105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 107     if (!s->skip_flag || !s->tab_ct_depth)
 108         goto fail;
 109
 110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 111     s->tab_ipm  = av_mallocz(min_pu_size);
 112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 114         goto fail;
 115
 116     s->filter_slice_edges = av_mallocz(ctb_count);
 117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 118                                       sizeof(*s->tab_slice_address));
 119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 120                                       sizeof(*s->qp_y_tab));
 121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 122         goto fail;
 123
 124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 126     if (!s->horizontal_bs || !s->vertical_bs)
 127         goto fail;
 128
 129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 130                                           av_buffer_allocz);
 131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 132                                           av_buffer_allocz);
 133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 134         goto fail;
 135
 136     return 0;
 137
 138 fail:
 139     pic_arrays_free(s);
 140     return AVERROR(ENOMEM);
 141 }
 142
 143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 144 {
 145     int i = 0;
 146     int j = 0;
 147     uint8_t luma_weight_l0_flag[16];
 148     uint8_t chroma_weight_l0_flag[16];
 149     uint8_t luma_weight_l1_flag[16];
 150     uint8_t chroma_weight_l1_flag[16];
 151     int luma_log2_weight_denom;
 152
 153     luma_log2_weight_denom = get_ue_golomb_long(gb);
 154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
 155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 156         return AVERROR_INVALIDDATA;
 157     }
 158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 159     if (s->ps.sps->chroma_format_idc != 0) {
 160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
 161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
 162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
 163             return AVERROR_INVALIDDATA;
 164         }
 165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
 166     }
 167
 168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 169         luma_weight_l0_flag[i] = get_bits1(gb);
 170         if (!luma_weight_l0_flag[i]) {
 171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 172             s->sh.luma_offset_l0[i] = 0;
 173         }
 174     }
 175     if (s->ps.sps->chroma_format_idc != 0) {
 176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 177             chroma_weight_l0_flag[i] = get_bits1(gb);
 178     } else {
 179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 180             chroma_weight_l0_flag[i] = 0;
 181     }
 182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 183         if (luma_weight_l0_flag[i]) {
 184             int delta_luma_weight_l0 = get_se_golomb(gb);
 185             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 186             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 187         }
 188         if (chroma_weight_l0_flag[i]) {
 189             for (j = 0; j < 2; j++) {
 190                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 191                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 192
 193                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
 194                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
 195                     return AVERROR_INVALIDDATA;
 196                 }
 197
 198                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 199                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 200                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 201             }
 202         } else {
 203             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 204             s->sh.chroma_offset_l0[i][0] = 0;
 205             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 206             s->sh.chroma_offset_l0[i][1] = 0;
 207         }
 208     }
 209     if (s->sh.slice_type == HEVC_SLICE_B) {
 210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 211             luma_weight_l1_flag[i] = get_bits1(gb);
 212             if (!luma_weight_l1_flag[i]) {
 213                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 214                 s->sh.luma_offset_l1[i] = 0;
 215             }
 216         }
 217         if (s->ps.sps->chroma_format_idc != 0) {
 218             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 219                 chroma_weight_l1_flag[i] = get_bits1(gb);
 220         } else {
 221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 222                 chroma_weight_l1_flag[i] = 0;
 223         }
 224         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 225             if (luma_weight_l1_flag[i]) {
 226                 int delta_luma_weight_l1 = get_se_golomb(gb);
 227                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 228                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 229             }
 230             if (chroma_weight_l1_flag[i]) {
 231                 for (j = 0; j < 2; j++) {
 232                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 233                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 234
 235                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
 236                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
 237                         return AVERROR_INVALIDDATA;
 238                     }
 239
 240                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 241                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 242                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 243                 }
 244             } else {
 245                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 246                 s->sh.chroma_offset_l1[i][0] = 0;
 247                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 248                 s->sh.chroma_offset_l1[i][1] = 0;
 249             }
 250         }
 251     }
 252     return 0;
 253 }
 254
 255 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 256 {
 257     const HEVCSPS *sps = s->ps.sps;
 258     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 259     int prev_delta_msb = 0;
 260     unsigned int nb_sps = 0, nb_sh;
 261     int i;
 262
 263     rps->nb_refs = 0;
 264     if (!sps->long_term_ref_pics_present_flag)
 265         return 0;
 266
 267     if (sps->num_long_term_ref_pics_sps > 0)
 268         nb_sps = get_ue_golomb_long(gb);
 269     nb_sh = get_ue_golomb_long(gb);
 270
 271     if (nb_sps > sps->num_long_term_ref_pics_sps)
 272         return AVERROR_INVALIDDATA;
 273     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 274         return AVERROR_INVALIDDATA;
 275
 276     rps->nb_refs = nb_sh + nb_sps;
 277
 278     for (i = 0; i < rps->nb_refs; i++) {
 279         uint8_t delta_poc_msb_present;
 280
 281         if (i < nb_sps) {
 282             uint8_t lt_idx_sps = 0;
 283
 284             if (sps->num_long_term_ref_pics_sps > 1)
 285                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 286
 287             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 288             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 289         } else {
 290             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 291             rps->used[i] = get_bits1(gb);
 292         }
 293
 294         delta_poc_msb_present = get_bits1(gb);
 295         if (delta_poc_msb_present) {
 296             int64_t delta = get_ue_golomb_long(gb);
 297             int64_t poc;
 298
 299             if (i && i != nb_sps)
 300                 delta += prev_delta_msb;
 301
 302             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 303             if (poc != (int32_t)poc)
 304                 return AVERROR_INVALIDDATA;
 305             rps->poc[i] = poc;
 306             prev_delta_msb = delta;
 307         }
 308     }
 309
 310     return 0;
 311 }
 312
 313 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 314                                  const HEVCSPS *sps)
 315 {
 316     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 317     const HEVCWindow *ow = &sps->output_window;
 318     unsigned int num = 0, den = 0;
 319
 320     avctx->pix_fmt             = sps->pix_fmt;
 321     avctx->coded_width         = sps->width;
 322     avctx->coded_height        = sps->height;
 323     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 324     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 325     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 326     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 327     avctx->level               = sps->ptl.general_ptl.level_idc;
 328
 329     ff_set_sar(avctx, sps->vui.sar);
 330
 331     if (sps->vui.video_signal_type_present_flag)
 332         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 333                                                             : AVCOL_RANGE_MPEG;
 334     else
 335         avctx->color_range = AVCOL_RANGE_MPEG;
 336
 337     if (sps->vui.colour_description_present_flag) {
 338         avctx->color_primaries = sps->vui.colour_primaries;
 339         avctx->color_trc       = sps->vui.transfer_characteristic;
 340         avctx->colorspace      = sps->vui.matrix_coeffs;
 341     } else {
 342         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 343         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 344         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 345     }
 346
 347     if (vps->vps_timing_info_present_flag) {
 348         num = vps->vps_num_units_in_tick;
 349         den = vps->vps_time_scale;
 350     } else if (sps->vui.vui_timing_info_present_flag) {
 351         num = sps->vui.vui_num_units_in_tick;
 352         den = sps->vui.vui_time_scale;
 353     }
 354
 355     if (num != 0 && den != 0)
 356         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 357                   num, den, 1 << 30);
 358 }
 359
 360 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 361 {
 362 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
 363                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 364                      CONFIG_HEVC_NVDEC_HWACCEL + \
 365                      CONFIG_HEVC_VAAPI_HWACCEL + \
 366                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
 367                      CONFIG_HEVC_VDPAU_HWACCEL)
 368     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 369
 370     switch (sps->pix_fmt) {
 371     case AV_PIX_FMT_YUV420P:
 372     case AV_PIX_FMT_YUVJ420P:
 373 #if CONFIG_HEVC_DXVA2_HWACCEL
 374         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 375 #endif
 376 #if CONFIG_HEVC_D3D11VA_HWACCEL
 377         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 378         *fmt++ = AV_PIX_FMT_D3D11;
 379 #endif
 380 #if CONFIG_HEVC_VAAPI_HWACCEL
 381         *fmt++ = AV_PIX_FMT_VAAPI;
 382 #endif
 383 #if CONFIG_HEVC_VDPAU_HWACCEL
 384         *fmt++ = AV_PIX_FMT_VDPAU;
 385 #endif
 386 #if CONFIG_HEVC_NVDEC_HWACCEL
 387         *fmt++ = AV_PIX_FMT_CUDA;
 388 #endif
 389 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 390         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 391 #endif
 392         break;
 393     case AV_PIX_FMT_YUV420P10:
 394 #if CONFIG_HEVC_DXVA2_HWACCEL
 395         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 396 #endif
 397 #if CONFIG_HEVC_D3D11VA_HWACCEL
 398         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 399         *fmt++ = AV_PIX_FMT_D3D11;
 400 #endif
 401 #if CONFIG_HEVC_VAAPI_HWACCEL
 402         *fmt++ = AV_PIX_FMT_VAAPI;
 403 #endif
 404 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 405         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 406 #endif
 407 #if CONFIG_HEVC_NVDEC_HWACCEL
 408         *fmt++ = AV_PIX_FMT_CUDA;
 409 #endif
 410         break;
 411     case AV_PIX_FMT_YUV444P:
 412 #if CONFIG_HEVC_VDPAU_HWACCEL
 413         *fmt++ = AV_PIX_FMT_VDPAU;
 414 #endif
 415 #if CONFIG_HEVC_NVDEC_HWACCEL
 416         *fmt++ = AV_PIX_FMT_CUDA;
 417 #endif
 418         break;
 419     case AV_PIX_FMT_YUV420P12:
 420     case AV_PIX_FMT_YUV444P10:
 421     case AV_PIX_FMT_YUV444P12:
 422 #if CONFIG_HEVC_NVDEC_HWACCEL
 423         *fmt++ = AV_PIX_FMT_CUDA;
 424 #endif
 425         break;
 426     }
 427
 428     *fmt++ = sps->pix_fmt;
 429     *fmt = AV_PIX_FMT_NONE;
 430
 431     return ff_thread_get_format(s->avctx, pix_fmts);
 432 }
 433
 434 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 435                    enum AVPixelFormat pix_fmt)
 436 {
 437     int ret, i;
 438
 439     pic_arrays_free(s);
 440     s->ps.sps = NULL;
 441     s->ps.vps = NULL;
 442
 443     if (!sps)
 444         return 0;
 445
 446     ret = pic_arrays_init(s, sps);
 447     if (ret < 0)
 448         goto fail;
 449
 450     export_stream_params(s->avctx, &s->ps, sps);
 451
 452     s->avctx->pix_fmt = pix_fmt;
 453
 454     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 455     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 456     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 457
 458     for (i = 0; i < 3; i++) {
 459         av_freep(&s->sao_pixel_buffer_h[i]);
 460         av_freep(&s->sao_pixel_buffer_v[i]);
 461     }
 462
 463     if (sps->sao_enabled && !s->avctx->hwaccel) {
 464         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 465         int c_idx;
 466
 467         for(c_idx = 0; c_idx < c_count; c_idx++) {
 468             int w = sps->width >> sps->hshift[c_idx];
 469             int h = sps->height >> sps->vshift[c_idx];
 470             s->sao_pixel_buffer_h[c_idx] =
 471                 av_malloc((w * 2 * sps->ctb_height) <<
 472                           sps->pixel_shift);
 473             s->sao_pixel_buffer_v[c_idx] =
 474                 av_malloc((h * 2 * sps->ctb_width) <<
 475                           sps->pixel_shift);
 476         }
 477     }
 478
 479     s->ps.sps = sps;
 480     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 481
 482     return 0;
 483
 484 fail:
 485     pic_arrays_free(s);
 486     s->ps.sps = NULL;
 487     return ret;
 488 }
 489
 490 static int hls_slice_header(HEVCContext *s)
 491 {
 492     GetBitContext *gb = &s->HEVClc->gb;
 493     SliceHeader *sh   = &s->sh;
 494     int i, ret;
 495
 496     // Coded parameters
 497     sh->first_slice_in_pic_flag = get_bits1(gb);
 498     if (s->ref && sh->first_slice_in_pic_flag) {
 499         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
 500         return 1; // This slice will be skiped later, do not corrupt state
 501     }
 502
 503     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 504         s->seq_decode = (s->seq_decode + 1) & 0xff;
 505         s->max_ra     = INT_MAX;
 506         if (IS_IDR(s))
 507             ff_hevc_clear_refs(s);
 508     }
 509     sh->no_output_of_prior_pics_flag = 0;
 510     if (IS_IRAP(s))
 511         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 512
 513     sh->pps_id = get_ue_golomb_long(gb);
 514     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 515         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 516         return AVERROR_INVALIDDATA;
 517     }
 518     if (!sh->first_slice_in_pic_flag &&
 519         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 520         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 521         return AVERROR_INVALIDDATA;
 522     }
 523     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 524     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 525         sh->no_output_of_prior_pics_flag = 1;
 526
 527     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 528         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 529         const HEVCSPS *last_sps = s->ps.sps;
 530         enum AVPixelFormat pix_fmt;
 531
 532         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 533             if (sps->width != last_sps->width || sps->height != last_sps->height ||
 534                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
 535                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 536                 sh->no_output_of_prior_pics_flag = 0;
 537         }
 538         ff_hevc_clear_refs(s);
 539
 540         ret = set_sps(s, sps, sps->pix_fmt);
 541         if (ret < 0)
 542             return ret;
 543
 544         pix_fmt = get_format(s, sps);
 545         if (pix_fmt < 0)
 546             return pix_fmt;
 547         s->avctx->pix_fmt = pix_fmt;
 548
 549         s->seq_decode = (s->seq_decode + 1) & 0xff;
 550         s->max_ra     = INT_MAX;
 551     }
 552
 553     sh->dependent_slice_segment_flag = 0;
 554     if (!sh->first_slice_in_pic_flag) {
 555         int slice_address_length;
 556
 557         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 558             sh->dependent_slice_segment_flag = get_bits1(gb);
 559
 560         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 561                                             s->ps.sps->ctb_height);
 562         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 563         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 564             av_log(s->avctx, AV_LOG_ERROR,
 565                    "Invalid slice segment address: %u.\n",
 566                    sh->slice_segment_addr);
 567             return AVERROR_INVALIDDATA;
 568         }
 569
 570         if (!sh->dependent_slice_segment_flag) {
 571             sh->slice_addr = sh->slice_segment_addr;
 572             s->slice_idx++;
 573         }
 574     } else {
 575         sh->slice_segment_addr = sh->slice_addr = 0;
 576         s->slice_idx           = 0;
 577         s->slice_initialized   = 0;
 578     }
 579
 580     if (!sh->dependent_slice_segment_flag) {
 581         s->slice_initialized = 0;
 582
 583         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 584             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 585
 586         sh->slice_type = get_ue_golomb_long(gb);
 587         if (!(sh->slice_type == HEVC_SLICE_I ||
 588               sh->slice_type == HEVC_SLICE_P ||
 589               sh->slice_type == HEVC_SLICE_B)) {
 590             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 591                    sh->slice_type);
 592             return AVERROR_INVALIDDATA;
 593         }
 594         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 595             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 596             return AVERROR_INVALIDDATA;
 597         }
 598
 599         // when flag is not present, picture is inferred to be output
 600         sh->pic_output_flag = 1;
 601         if (s->ps.pps->output_flag_present_flag)
 602             sh->pic_output_flag = get_bits1(gb);
 603
 604         if (s->ps.sps->separate_colour_plane_flag)
 605             sh->colour_plane_id = get_bits(gb, 2);
 606
 607         if (!IS_IDR(s)) {
 608             int poc, pos;
 609
 610             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 611             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
 612             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 613                 av_log(s->avctx, AV_LOG_WARNING,
 614                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 615                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 616                     return AVERROR_INVALIDDATA;
 617                 poc = s->poc;
 618             }
 619             s->poc = poc;
 620
 621             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 622             pos = get_bits_left(gb);
 623             if (!sh->short_term_ref_pic_set_sps_flag) {
 624                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 625                 if (ret < 0)
 626                     return ret;
 627
 628                 sh->short_term_rps = &sh->slice_rps;
 629             } else {
 630                 int numbits, rps_idx;
 631
 632                 if (!s->ps.sps->nb_st_rps) {
 633                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 634                     return AVERROR_INVALIDDATA;
 635                 }
 636
 637                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 638                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 639                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 640             }
 641             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 642
 643             pos = get_bits_left(gb);
 644             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 645             if (ret < 0) {
 646                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 647                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 648                     return AVERROR_INVALIDDATA;
 649             }
 650             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 651
 652             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 653                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 654             else
 655                 sh->slice_temporal_mvp_enabled_flag = 0;
 656         } else {
 657             s->sh.short_term_rps = NULL;
 658             s->poc               = 0;
 659         }
 660
 661         /* 8.3.1 */
 662         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 663             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 664             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 665             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 666             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 667             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 668             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 669             s->nal_unit_type != HEVC_NAL_RASL_R)
 670             s->pocTid0 = s->poc;
 671
 672         if (s->ps.sps->sao_enabled) {
 673             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 674             if (s->ps.sps->chroma_format_idc) {
 675                 sh->slice_sample_adaptive_offset_flag[1] =
 676                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 677             }
 678         } else {
 679             sh->slice_sample_adaptive_offset_flag[0] = 0;
 680             sh->slice_sample_adaptive_offset_flag[1] = 0;
 681             sh->slice_sample_adaptive_offset_flag[2] = 0;
 682         }
 683
 684         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 685         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 686             int nb_refs;
 687
 688             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 689             if (sh->slice_type == HEVC_SLICE_B)
 690                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 691
 692             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 693                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 694                 if (sh->slice_type == HEVC_SLICE_B)
 695                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 696             }
 697             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 698                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 699                        sh->nb_refs[L0], sh->nb_refs[L1]);
 700                 return AVERROR_INVALIDDATA;
 701             }
 702
 703             sh->rpl_modification_flag[0] = 0;
 704             sh->rpl_modification_flag[1] = 0;
 705             nb_refs = ff_hevc_frame_nb_refs(s);
 706             if (!nb_refs) {
 707                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 708                 return AVERROR_INVALIDDATA;
 709             }
 710
 711             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 712                 sh->rpl_modification_flag[0] = get_bits1(gb);
 713                 if (sh->rpl_modification_flag[0]) {
 714                     for (i = 0; i < sh->nb_refs[L0]; i++)
 715                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 716                 }
 717
 718                 if (sh->slice_type == HEVC_SLICE_B) {
 719                     sh->rpl_modification_flag[1] = get_bits1(gb);
 720                     if (sh->rpl_modification_flag[1] == 1)
 721                         for (i = 0; i < sh->nb_refs[L1]; i++)
 722                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 723                 }
 724             }
 725
 726             if (sh->slice_type == HEVC_SLICE_B)
 727                 sh->mvd_l1_zero_flag = get_bits1(gb);
 728
 729             if (s->ps.pps->cabac_init_present_flag)
 730                 sh->cabac_init_flag = get_bits1(gb);
 731             else
 732                 sh->cabac_init_flag = 0;
 733
 734             sh->collocated_ref_idx = 0;
 735             if (sh->slice_temporal_mvp_enabled_flag) {
 736                 sh->collocated_list = L0;
 737                 if (sh->slice_type == HEVC_SLICE_B)
 738                     sh->collocated_list = !get_bits1(gb);
 739
 740                 if (sh->nb_refs[sh->collocated_list] > 1) {
 741                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 742                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 743                         av_log(s->avctx, AV_LOG_ERROR,
 744                                "Invalid collocated_ref_idx: %d.\n",
 745                                sh->collocated_ref_idx);
 746                         return AVERROR_INVALIDDATA;
 747                     }
 748                 }
 749             }
 750
 751             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 752                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 753                 int ret = pred_weight_table(s, gb);
 754                 if (ret < 0)
 755                     return ret;
 756             }
 757
 758             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 759             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 760                 av_log(s->avctx, AV_LOG_ERROR,
 761                        "Invalid number of merging MVP candidates: %d.\n",
 762                        sh->max_num_merge_cand);
 763                 return AVERROR_INVALIDDATA;
 764             }
 765         }
 766
 767         sh->slice_qp_delta = get_se_golomb(gb);
 768
 769         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 770             sh->slice_cb_qp_offset = get_se_golomb(gb);
 771             sh->slice_cr_qp_offset = get_se_golomb(gb);
 772         } else {
 773             sh->slice_cb_qp_offset = 0;
 774             sh->slice_cr_qp_offset = 0;
 775         }
 776
 777         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 778             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 779         else
 780             sh->cu_chroma_qp_offset_enabled_flag = 0;
 781
 782         if (s->ps.pps->deblocking_filter_control_present_flag) {
 783             int deblocking_filter_override_flag = 0;
 784
 785             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 786                 deblocking_filter_override_flag = get_bits1(gb);
 787
 788             if (deblocking_filter_override_flag) {
 789                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 790                 if (!sh->disable_deblocking_filter_flag) {
 791                     int beta_offset_div2 = get_se_golomb(gb);
 792                     int tc_offset_div2   = get_se_golomb(gb) ;
 793                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
 794                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
 795                         av_log(s->avctx, AV_LOG_ERROR,
 796                             "Invalid deblock filter offsets: %d, %d\n",
 797                             beta_offset_div2, tc_offset_div2);
 798                         return AVERROR_INVALIDDATA;
 799                     }
 800                     sh->beta_offset = beta_offset_div2 * 2;
 801                     sh->tc_offset   =   tc_offset_div2 * 2;
 802                 }
 803             } else {
 804                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 805                 sh->beta_offset                    = s->ps.pps->beta_offset;
 806                 sh->tc_offset                      = s->ps.pps->tc_offset;
 807             }
 808         } else {
 809             sh->disable_deblocking_filter_flag = 0;
 810             sh->beta_offset                    = 0;
 811             sh->tc_offset                      = 0;
 812         }
 813
 814         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 815             (sh->slice_sample_adaptive_offset_flag[0] ||
 816              sh->slice_sample_adaptive_offset_flag[1] ||
 817              !sh->disable_deblocking_filter_flag)) {
 818             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 819         } else {
 820             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 821         }
 822     } else if (!s->slice_initialized) {
 823         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 824         return AVERROR_INVALIDDATA;
 825     }
 826
 827     sh->num_entry_point_offsets = 0;
 828     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 829         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 830         // It would be possible to bound this tighter but this here is simpler
 831         if (num_entry_point_offsets > get_bits_left(gb)) {
 832             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 833             return AVERROR_INVALIDDATA;
 834         }
 835
 836         sh->num_entry_point_offsets = num_entry_point_offsets;
 837         if (sh->num_entry_point_offsets > 0) {
 838             int offset_len = get_ue_golomb_long(gb) + 1;
 839
 840             if (offset_len < 1 || offset_len > 32) {
 841                 sh->num_entry_point_offsets = 0;
 842                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 843                 return AVERROR_INVALIDDATA;
 844             }
 845
 846             av_freep(&sh->entry_point_offset);
 847             av_freep(&sh->offset);
 848             av_freep(&sh->size);
 849             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 850             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 851             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 852             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 853                 sh->num_entry_point_offsets = 0;
 854                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 855                 return AVERROR(ENOMEM);
 856             }
 857             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 858                 unsigned val = get_bits_long(gb, offset_len);
 859                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 860             }
 861             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 862                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 863                 s->threads_number = 1;
 864             } else
 865                 s->enable_parallel_tiles = 0;
 866         } else
 867             s->enable_parallel_tiles = 0;
 868     }
 869
 870     if (s->ps.pps->slice_header_extension_present_flag) {
 871         unsigned int length = get_ue_golomb_long(gb);
 872         if (length*8LL > get_bits_left(gb)) {
 873             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 874             return AVERROR_INVALIDDATA;
 875         }
 876         for (i = 0; i < length; i++)
 877             skip_bits(gb, 8);  // slice_header_extension_data_byte
 878     }
 879
 880     // Inferred parameters
 881     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 882     if (sh->slice_qp > 51 ||
 883         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 884         av_log(s->avctx, AV_LOG_ERROR,
 885                "The slice_qp %d is outside the valid range "
 886                "[%d, 51].\n",
 887                sh->slice_qp,
 888                -s->ps.sps->qp_bd_offset);
 889         return AVERROR_INVALIDDATA;
 890     }
 891
 892     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 893
 894     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 895         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 896         return AVERROR_INVALIDDATA;
 897     }
 898
 899     if (get_bits_left(gb) < 0) {
 900         av_log(s->avctx, AV_LOG_ERROR,
 901                "Overread slice header by %d bits\n", -get_bits_left(gb));
 902         return AVERROR_INVALIDDATA;
 903     }
 904
 905     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 906
 907     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 908         s->HEVClc->qp_y = s->sh.slice_qp;
 909
 910     s->slice_initialized = 1;
 911     s->HEVClc->tu.cu_qp_offset_cb = 0;
 912     s->HEVClc->tu.cu_qp_offset_cr = 0;
 913
 914     return 0;
 915 }
 916
 917 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 918
 919 #define SET_SAO(elem, value)                            \
 920 do {                                                    \
 921     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 922         sao->elem = value;                              \
 923     else if (sao_merge_left_flag)                       \
 924         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 925     else if (sao_merge_up_flag)                         \
 926         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 927     else                                                \
 928         sao->elem = 0;                                  \
 929 } while (0)
 930
 931 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 932 {
 933     HEVCLocalContext *lc    = s->HEVClc;
 934     int sao_merge_left_flag = 0;
 935     int sao_merge_up_flag   = 0;
 936     SAOParams *sao          = &CTB(s->sao, rx, ry);
 937     int c_idx, i;
 938
 939     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 940         s->sh.slice_sample_adaptive_offset_flag[1]) {
 941         if (rx > 0) {
 942             if (lc->ctb_left_flag)
 943                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 944         }
 945         if (ry > 0 && !sao_merge_left_flag) {
 946             if (lc->ctb_up_flag)
 947                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 948         }
 949     }
 950
 951     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 952         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 953                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 954
 955         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 956             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 957             continue;
 958         }
 959
 960         if (c_idx == 2) {
 961             sao->type_idx[2] = sao->type_idx[1];
 962             sao->eo_class[2] = sao->eo_class[1];
 963         } else {
 964             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 965         }
 966
 967         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 968             continue;
 969
 970         for (i = 0; i < 4; i++)
 971             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 972
 973         if (sao->type_idx[c_idx] == SAO_BAND) {
 974             for (i = 0; i < 4; i++) {
 975                 if (sao->offset_abs[c_idx][i]) {
 976                     SET_SAO(offset_sign[c_idx][i],
 977                             ff_hevc_sao_offset_sign_decode(s));
 978                 } else {
 979                     sao->offset_sign[c_idx][i] = 0;
 980                 }
 981             }
 982             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 983         } else if (c_idx != 2) {
 984             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 985         }
 986
 987         // Inferred parameters
 988         sao->offset_val[c_idx][0] = 0;
 989         for (i = 0; i < 4; i++) {
 990             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 991             if (sao->type_idx[c_idx] == SAO_EDGE) {
 992                 if (i > 1)
 993                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 994             } else if (sao->offset_sign[c_idx][i]) {
 995                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 996             }
 997             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 998         }
 999     }
1000 }
1001
1002 #undef SET_SAO
1003 #undef CTB
1004
1005 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1006     HEVCLocalContext *lc    = s->HEVClc;
1007     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1008
1009     if (log2_res_scale_abs_plus1 !=  0) {
1010         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1011         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1012                                (1 - 2 * res_scale_sign_flag);
1013     } else {
1014         lc->tu.res_scale_val = 0;
1015     }
1016
1017
1018     return 0;
1019 }
1020
1021 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1022                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1023                               int log2_cb_size, int log2_trafo_size,
1024                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1025 {
1026     HEVCLocalContext *lc = s->HEVClc;
1027     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1028     int i;
1029
1030     if (lc->cu.pred_mode == MODE_INTRA) {
1031         int trafo_size = 1 << log2_trafo_size;
1032         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1033
1034         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1035     }
1036
1037     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1038         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1039         int scan_idx   = SCAN_DIAG;
1040         int scan_idx_c = SCAN_DIAG;
1041         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1042                          (s->ps.sps->chroma_format_idc == 2 &&
1043                          (cbf_cb[1] || cbf_cr[1]));
1044
1045         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1046             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1047             if (lc->tu.cu_qp_delta != 0)
1048                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1049                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1050             lc->tu.is_cu_qp_delta_coded = 1;
1051
1052             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1053                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1054                 av_log(s->avctx, AV_LOG_ERROR,
1055                        "The cu_qp_delta %d is outside the valid range "
1056                        "[%d, %d].\n",
1057                        lc->tu.cu_qp_delta,
1058                        -(26 + s->ps.sps->qp_bd_offset / 2),
1059                         (25 + s->ps.sps->qp_bd_offset / 2));
1060                 return AVERROR_INVALIDDATA;
1061             }
1062
1063             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1064         }
1065
1066         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1067             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1068             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1069             if (cu_chroma_qp_offset_flag) {
1070                 int cu_chroma_qp_offset_idx  = 0;
1071                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1072                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1073                     av_log(s->avctx, AV_LOG_ERROR,
1074                         "cu_chroma_qp_offset_idx not yet tested.\n");
1075                 }
1076                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1077                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1078             } else {
1079                 lc->tu.cu_qp_offset_cb = 0;
1080                 lc->tu.cu_qp_offset_cr = 0;
1081             }
1082             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1083         }
1084
1085         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1086             if (lc->tu.intra_pred_mode >= 6 &&
1087                 lc->tu.intra_pred_mode <= 14) {
1088                 scan_idx = SCAN_VERT;
1089             } else if (lc->tu.intra_pred_mode >= 22 &&
1090                        lc->tu.intra_pred_mode <= 30) {
1091                 scan_idx = SCAN_HORIZ;
1092             }
1093
1094             if (lc->tu.intra_pred_mode_c >=  6 &&
1095                 lc->tu.intra_pred_mode_c <= 14) {
1096                 scan_idx_c = SCAN_VERT;
1097             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1098                        lc->tu.intra_pred_mode_c <= 30) {
1099                 scan_idx_c = SCAN_HORIZ;
1100             }
1101         }
1102
1103         lc->tu.cross_pf = 0;
1104
1105         if (cbf_luma)
1106             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1107         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1108             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1109             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1110             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1111                                 (lc->cu.pred_mode == MODE_INTER ||
1112                                  (lc->tu.chroma_mode_c ==  4)));
1113
1114             if (lc->tu.cross_pf) {
1115                 hls_cross_component_pred(s, 0);
1116             }
1117             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1118                 if (lc->cu.pred_mode == MODE_INTRA) {
1119                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1120                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1121                 }
1122                 if (cbf_cb[i])
1123                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1124                                                 log2_trafo_size_c, scan_idx_c, 1);
1125                 else
1126                     if (lc->tu.cross_pf) {
1127                         ptrdiff_t stride = s->frame->linesize[1];
1128                         int hshift = s->ps.sps->hshift[1];
1129                         int vshift = s->ps.sps->vshift[1];
1130                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1131                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1132                         int size = 1 << log2_trafo_size_c;
1133
1134                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1135                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1136                         for (i = 0; i < (size * size); i++) {
1137                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1138                         }
1139                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1140                     }
1141             }
1142
1143             if (lc->tu.cross_pf) {
1144                 hls_cross_component_pred(s, 1);
1145             }
1146             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1147                 if (lc->cu.pred_mode == MODE_INTRA) {
1148                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1149                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1150                 }
1151                 if (cbf_cr[i])
1152                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1153                                                 log2_trafo_size_c, scan_idx_c, 2);
1154                 else
1155                     if (lc->tu.cross_pf) {
1156                         ptrdiff_t stride = s->frame->linesize[2];
1157                         int hshift = s->ps.sps->hshift[2];
1158                         int vshift = s->ps.sps->vshift[2];
1159                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1160                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1161                         int size = 1 << log2_trafo_size_c;
1162
1163                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1164                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1165                         for (i = 0; i < (size * size); i++) {
1166                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1167                         }
1168                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1169                     }
1170             }
1171         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1172             int trafo_size_h = 1 << (log2_trafo_size + 1);
1173             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1174             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1175                 if (lc->cu.pred_mode == MODE_INTRA) {
1176                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1177                                                     trafo_size_h, trafo_size_v);
1178                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1179                 }
1180                 if (cbf_cb[i])
1181                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1182                                                 log2_trafo_size, scan_idx_c, 1);
1183             }
1184             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1185                 if (lc->cu.pred_mode == MODE_INTRA) {
1186                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1187                                                 trafo_size_h, trafo_size_v);
1188                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1189                 }
1190                 if (cbf_cr[i])
1191                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1192                                                 log2_trafo_size, scan_idx_c, 2);
1193             }
1194         }
1195     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1196         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1197             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1198             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1199             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1200             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1201             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1202             if (s->ps.sps->chroma_format_idc == 2) {
1203                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1204                                                 trafo_size_h, trafo_size_v);
1205                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1206                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1207             }
1208         } else if (blk_idx == 3) {
1209             int trafo_size_h = 1 << (log2_trafo_size + 1);
1210             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1211             ff_hevc_set_neighbour_available(s, xBase, yBase,
1212                                             trafo_size_h, trafo_size_v);
1213             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1214             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1215             if (s->ps.sps->chroma_format_idc == 2) {
1216                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1217                                                 trafo_size_h, trafo_size_v);
1218                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1219                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1220             }
1221         }
1222     }
1223
1224     return 0;
1225 }
1226
1227 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1228 {
1229     int cb_size          = 1 << log2_cb_size;
1230     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1231
1232     int min_pu_width     = s->ps.sps->min_pu_width;
1233     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1234     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1235     int i, j;
1236
1237     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1238         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1239             s->is_pcm[i + j * min_pu_width] = 2;
1240 }
1241
1242 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1243                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1244                               int log2_cb_size, int log2_trafo_size,
1245                               int trafo_depth, int blk_idx,
1246                               const int *base_cbf_cb, const int *base_cbf_cr)
1247 {
1248     HEVCLocalContext *lc = s->HEVClc;
1249     uint8_t split_transform_flag;
1250     int cbf_cb[2];
1251     int cbf_cr[2];
1252     int ret;
1253
1254     cbf_cb[0] = base_cbf_cb[0];
1255     cbf_cb[1] = base_cbf_cb[1];
1256     cbf_cr[0] = base_cbf_cr[0];
1257     cbf_cr[1] = base_cbf_cr[1];
1258
1259     if (lc->cu.intra_split_flag) {
1260         if (trafo_depth == 1) {
1261             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1262             if (s->ps.sps->chroma_format_idc == 3) {
1263                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1264                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1265             } else {
1266                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1267                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1268             }
1269         }
1270     } else {
1271         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1272         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1273         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1274     }
1275
1276     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1277         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1278         trafo_depth     < lc->cu.max_trafo_depth       &&
1279         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1280         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1281     } else {
1282         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1283                           lc->cu.pred_mode == MODE_INTER &&
1284                           lc->cu.part_mode != PART_2Nx2N &&
1285                           trafo_depth == 0;
1286
1287         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1288                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1289                                inter_split;
1290     }
1291
1292     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1293         if (trafo_depth == 0 || cbf_cb[0]) {
1294             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1295             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1296                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1297             }
1298         }
1299
1300         if (trafo_depth == 0 || cbf_cr[0]) {
1301             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1302             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1303                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1304             }
1305         }
1306     }
1307
1308     if (split_transform_flag) {
1309         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1310         const int x1 = x0 + trafo_size_split;
1311         const int y1 = y0 + trafo_size_split;
1312
1313 #define SUBDIVIDE(x, y, idx)                                                    \
1314 do {                                                                            \
1315     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1316                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1317                              cbf_cb, cbf_cr);                                   \
1318     if (ret < 0)                                                                \
1319         return ret;                                                             \
1320 } while (0)
1321
1322         SUBDIVIDE(x0, y0, 0);
1323         SUBDIVIDE(x1, y0, 1);
1324         SUBDIVIDE(x0, y1, 2);
1325         SUBDIVIDE(x1, y1, 3);
1326
1327 #undef SUBDIVIDE
1328     } else {
1329         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1330         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1331         int min_tu_width     = s->ps.sps->min_tb_width;
1332         int cbf_luma         = 1;
1333
1334         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1335             cbf_cb[0] || cbf_cr[0] ||
1336             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1337             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1338         }
1339
1340         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1341                                  log2_cb_size, log2_trafo_size,
1342                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1343         if (ret < 0)
1344             return ret;
1345         // TODO: store cbf_luma somewhere else
1346         if (cbf_luma) {
1347             int i, j;
1348             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1349                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1350                     int x_tu = (x0 + j) >> log2_min_tu_size;
1351                     int y_tu = (y0 + i) >> log2_min_tu_size;
1352                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1353                 }
1354         }
1355         if (!s->sh.disable_deblocking_filter_flag) {
1356             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1357             if (s->ps.pps->transquant_bypass_enable_flag &&
1358                 lc->cu.cu_transquant_bypass_flag)
1359                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1360         }
1361     }
1362     return 0;
1363 }
1364
1365 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1366 {
1367     HEVCLocalContext *lc = s->HEVClc;
1368     GetBitContext gb;
1369     int cb_size   = 1 << log2_cb_size;
1370     ptrdiff_t stride0 = s->frame->linesize[0];
1371     ptrdiff_t stride1 = s->frame->linesize[1];
1372     ptrdiff_t stride2 = s->frame->linesize[2];
1373     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1374     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1375     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1376
1377     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1378                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1379                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1380                           s->ps.sps->pcm.bit_depth_chroma;
1381     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1382     int ret;
1383
1384     if (!s->sh.disable_deblocking_filter_flag)
1385         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1386
1387     ret = init_get_bits(&gb, pcm, length);
1388     if (ret < 0)
1389         return ret;
1390
1391     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1392     if (s->ps.sps->chroma_format_idc) {
1393         s->hevcdsp.put_pcm(dst1, stride1,
1394                            cb_size >> s->ps.sps->hshift[1],
1395                            cb_size >> s->ps.sps->vshift[1],
1396                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1397         s->hevcdsp.put_pcm(dst2, stride2,
1398                            cb_size >> s->ps.sps->hshift[2],
1399                            cb_size >> s->ps.sps->vshift[2],
1400                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1401     }
1402
1403     return 0;
1404 }
1405
1406 /**
1407  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1408  *
1409  * @param s HEVC decoding context
1410  * @param dst target buffer for block data at block position
1411  * @param dststride stride of the dst buffer
1412  * @param ref reference picture buffer at origin (0, 0)
1413  * @param mv motion vector (relative to block position) to get pixel data from
1414  * @param x_off horizontal position of block from origin (0, 0)
1415  * @param y_off vertical position of block from origin (0, 0)
1416  * @param block_w width of block
1417  * @param block_h height of block
1418  * @param luma_weight weighting factor applied to the luma prediction
1419  * @param luma_offset additive offset applied to the luma prediction value
1420  */
1421
1422 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1423                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1424                         int block_w, int block_h, int luma_weight, int luma_offset)
1425 {
1426     HEVCLocalContext *lc = s->HEVClc;
1427     uint8_t *src         = ref->data[0];
1428     ptrdiff_t srcstride  = ref->linesize[0];
1429     int pic_width        = s->ps.sps->width;
1430     int pic_height       = s->ps.sps->height;
1431     int mx               = mv->x & 3;
1432     int my               = mv->y & 3;
1433     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1434                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1435     int idx              = ff_hevc_pel_weight[block_w];
1436
1437     x_off += mv->x >> 2;
1438     y_off += mv->y >> 2;
1439     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1440
1441     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1442         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1443         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1444         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1445         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1446         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1447
1448         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1449                                  edge_emu_stride, srcstride,
1450                                  block_w + QPEL_EXTRA,
1451                                  block_h + QPEL_EXTRA,
1452                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1453                                  pic_width, pic_height);
1454         src = lc->edge_emu_buffer + buf_offset;
1455         srcstride = edge_emu_stride;
1456     }
1457
1458     if (!weight_flag)
1459         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1460                                                       block_h, mx, my, block_w);
1461     else
1462         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1463                                                         block_h, s->sh.luma_log2_weight_denom,
1464                                                         luma_weight, luma_offset, mx, my, block_w);
1465 }
1466
1467 /**
1468  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1469  *
1470  * @param s HEVC decoding context
1471  * @param dst target buffer for block data at block position
1472  * @param dststride stride of the dst buffer
1473  * @param ref0 reference picture0 buffer at origin (0, 0)
1474  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1475  * @param x_off horizontal position of block from origin (0, 0)
1476  * @param y_off vertical position of block from origin (0, 0)
1477  * @param block_w width of block
1478  * @param block_h height of block
1479  * @param ref1 reference picture1 buffer at origin (0, 0)
1480  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1481  * @param current_mv current motion vector structure
1482  */
1483  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1484                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1485                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1486 {
1487     HEVCLocalContext *lc = s->HEVClc;
1488     ptrdiff_t src0stride  = ref0->linesize[0];
1489     ptrdiff_t src1stride  = ref1->linesize[0];
1490     int pic_width        = s->ps.sps->width;
1491     int pic_height       = s->ps.sps->height;
1492     int mx0              = mv0->x & 3;
1493     int my0              = mv0->y & 3;
1494     int mx1              = mv1->x & 3;
1495     int my1              = mv1->y & 3;
1496     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1497                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1498     int x_off0           = x_off + (mv0->x >> 2);
1499     int y_off0           = y_off + (mv0->y >> 2);
1500     int x_off1           = x_off + (mv1->x >> 2);
1501     int y_off1           = y_off + (mv1->y >> 2);
1502     int idx              = ff_hevc_pel_weight[block_w];
1503
1504     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1505     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1506
1507     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1508         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1509         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1510         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1511         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1512         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1513
1514         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1515                                  edge_emu_stride, src0stride,
1516                                  block_w + QPEL_EXTRA,
1517                                  block_h + QPEL_EXTRA,
1518                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1519                                  pic_width, pic_height);
1520         src0 = lc->edge_emu_buffer + buf_offset;
1521         src0stride = edge_emu_stride;
1522     }
1523
1524     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1525         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1526         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1527         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1528         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1529         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1530
1531         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1532                                  edge_emu_stride, src1stride,
1533                                  block_w + QPEL_EXTRA,
1534                                  block_h + QPEL_EXTRA,
1535                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1536                                  pic_width, pic_height);
1537         src1 = lc->edge_emu_buffer2 + buf_offset;
1538         src1stride = edge_emu_stride;
1539     }
1540
1541     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1542                                                 block_h, mx0, my0, block_w);
1543     if (!weight_flag)
1544         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1545                                                        block_h, mx1, my1, block_w);
1546     else
1547         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1548                                                          block_h, s->sh.luma_log2_weight_denom,
1549                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1550                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1551                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1552                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1553                                                          mx1, my1, block_w);
1554
1555 }
1556
1557 /**
1558  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1559  *
1560  * @param s HEVC decoding context
1561  * @param dst1 target buffer for block data at block position (U plane)
1562  * @param dst2 target buffer for block data at block position (V plane)
1563  * @param dststride stride of the dst1 and dst2 buffers
1564  * @param ref reference picture buffer at origin (0, 0)
1565  * @param mv motion vector (relative to block position) to get pixel data from
1566  * @param x_off horizontal position of block from origin (0, 0)
1567  * @param y_off vertical position of block from origin (0, 0)
1568  * @param block_w width of block
1569  * @param block_h height of block
1570  * @param chroma_weight weighting factor applied to the chroma prediction
1571  * @param chroma_offset additive offset applied to the chroma prediction value
1572  */
1573
1574 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1575                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1576                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1577 {
1578     HEVCLocalContext *lc = s->HEVClc;
1579     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1580     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1581     const Mv *mv         = &current_mv->mv[reflist];
1582     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1583                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1584     int idx              = ff_hevc_pel_weight[block_w];
1585     int hshift           = s->ps.sps->hshift[1];
1586     int vshift           = s->ps.sps->vshift[1];
1587     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1588     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1589     intptr_t _mx         = mx << (1 - hshift);
1590     intptr_t _my         = my << (1 - vshift);
1591
1592     x_off += mv->x >> (2 + hshift);
1593     y_off += mv->y >> (2 + vshift);
1594     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1595
1596     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1597         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1598         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1599         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1600         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1601         int buf_offset0 = EPEL_EXTRA_BEFORE *
1602                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1603         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1604                                  edge_emu_stride, srcstride,
1605                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1606                                  x_off - EPEL_EXTRA_BEFORE,
1607                                  y_off - EPEL_EXTRA_BEFORE,
1608                                  pic_width, pic_height);
1609
1610         src0 = lc->edge_emu_buffer + buf_offset0;
1611         srcstride = edge_emu_stride;
1612     }
1613     if (!weight_flag)
1614         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1615                                                   block_h, _mx, _my, block_w);
1616     else
1617         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1618                                                         block_h, s->sh.chroma_log2_weight_denom,
1619                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1620 }
1621
1622 /**
1623  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1624  *
1625  * @param s HEVC decoding context
1626  * @param dst target buffer for block data at block position
1627  * @param dststride stride of the dst buffer
1628  * @param ref0 reference picture0 buffer at origin (0, 0)
1629  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1630  * @param x_off horizontal position of block from origin (0, 0)
1631  * @param y_off vertical position of block from origin (0, 0)
1632  * @param block_w width of block
1633  * @param block_h height of block
1634  * @param ref1 reference picture1 buffer at origin (0, 0)
1635  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1636  * @param current_mv current motion vector structure
1637  * @param cidx chroma component(cb, cr)
1638  */
1639 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1640                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1641 {
1642     HEVCLocalContext *lc = s->HEVClc;
1643     uint8_t *src1        = ref0->data[cidx+1];
1644     uint8_t *src2        = ref1->data[cidx+1];
1645     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1646     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1647     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1648                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1649     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1650     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1651     Mv *mv0              = &current_mv->mv[0];
1652     Mv *mv1              = &current_mv->mv[1];
1653     int hshift = s->ps.sps->hshift[1];
1654     int vshift = s->ps.sps->vshift[1];
1655
1656     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1657     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1658     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1659     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1660     intptr_t _mx0 = mx0 << (1 - hshift);
1661     intptr_t _my0 = my0 << (1 - vshift);
1662     intptr_t _mx1 = mx1 << (1 - hshift);
1663     intptr_t _my1 = my1 << (1 - vshift);
1664
1665     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1666     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1667     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1668     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1669     int idx = ff_hevc_pel_weight[block_w];
1670     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1671     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1672
1673     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1674         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1675         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1676         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1677         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1678         int buf_offset1 = EPEL_EXTRA_BEFORE *
1679                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1680
1681         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1682                                  edge_emu_stride, src1stride,
1683                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1684                                  x_off0 - EPEL_EXTRA_BEFORE,
1685                                  y_off0 - EPEL_EXTRA_BEFORE,
1686                                  pic_width, pic_height);
1687
1688         src1 = lc->edge_emu_buffer + buf_offset1;
1689         src1stride = edge_emu_stride;
1690     }
1691
1692     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1693         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1694         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1695         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1696         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1697         int buf_offset1 = EPEL_EXTRA_BEFORE *
1698                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1699
1700         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1701                                  edge_emu_stride, src2stride,
1702                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1703                                  x_off1 - EPEL_EXTRA_BEFORE,
1704                                  y_off1 - EPEL_EXTRA_BEFORE,
1705                                  pic_width, pic_height);
1706
1707         src2 = lc->edge_emu_buffer2 + buf_offset1;
1708         src2stride = edge_emu_stride;
1709     }
1710
1711     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1712                                                 block_h, _mx0, _my0, block_w);
1713     if (!weight_flag)
1714         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1715                                                        src2, src2stride, lc->tmp,
1716                                                        block_h, _mx1, _my1, block_w);
1717     else
1718         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1719                                                          src2, src2stride, lc->tmp,
1720                                                          block_h,
1721                                                          s->sh.chroma_log2_weight_denom,
1722                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1723                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1724                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1725                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1726                                                          _mx1, _my1, block_w);
1727 }
1728
1729 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1730                                 const Mv *mv, int y0, int height)
1731 {
1732     if (s->threads_type == FF_THREAD_FRAME ) {
1733         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1734
1735         ff_thread_await_progress(&ref->tf, y, 0);
1736     }
1737 }
1738
1739 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1740                                   int nPbH, int log2_cb_size, int part_idx,
1741                                   int merge_idx, MvField *mv)
1742 {
1743     HEVCLocalContext *lc = s->HEVClc;
1744     enum InterPredIdc inter_pred_idc = PRED_L0;
1745     int mvp_flag;
1746
1747     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1748     mv->pred_flag = 0;
1749     if (s->sh.slice_type == HEVC_SLICE_B)
1750         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1751
1752     if (inter_pred_idc != PRED_L1) {
1753         if (s->sh.nb_refs[L0])
1754             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1755
1756         mv->pred_flag = PF_L0;
1757         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1758         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1759         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1760                                  part_idx, merge_idx, mv, mvp_flag, 0);
1761         mv->mv[0].x += lc->pu.mvd.x;
1762         mv->mv[0].y += lc->pu.mvd.y;
1763     }
1764
1765     if (inter_pred_idc != PRED_L0) {
1766         if (s->sh.nb_refs[L1])
1767             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1768
1769         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1770             AV_ZERO32(&lc->pu.mvd);
1771         } else {
1772             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1773         }
1774
1775         mv->pred_flag += PF_L1;
1776         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1777         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1778                                  part_idx, merge_idx, mv, mvp_flag, 1);
1779         mv->mv[1].x += lc->pu.mvd.x;
1780         mv->mv[1].y += lc->pu.mvd.y;
1781     }
1782 }
1783
1784 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1785                                 int nPbW, int nPbH,
1786                                 int log2_cb_size, int partIdx, int idx)
1787 {
1788 #define POS(c_idx, x, y)                                                              \
1789     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1790                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1791     HEVCLocalContext *lc = s->HEVClc;
1792     int merge_idx = 0;
1793     struct MvField current_mv = {{{ 0 }}};
1794
1795     int min_pu_width = s->ps.sps->min_pu_width;
1796
1797     MvField *tab_mvf = s->ref->tab_mvf;
1798     RefPicList  *refPicList = s->ref->refPicList;
1799     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1800     uint8_t *dst0 = POS(0, x0, y0);
1801     uint8_t *dst1 = POS(1, x0, y0);
1802     uint8_t *dst2 = POS(2, x0, y0);
1803     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1804     int min_cb_width     = s->ps.sps->min_cb_width;
1805     int x_cb             = x0 >> log2_min_cb_size;
1806     int y_cb             = y0 >> log2_min_cb_size;
1807     int x_pu, y_pu;
1808     int i, j;
1809
1810     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1811
1812     if (!skip_flag)
1813         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1814
1815     if (skip_flag || lc->pu.merge_flag) {
1816         if (s->sh.max_num_merge_cand > 1)
1817             merge_idx = ff_hevc_merge_idx_decode(s);
1818         else
1819             merge_idx = 0;
1820
1821         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1822                                    partIdx, merge_idx, &current_mv);
1823     } else {
1824         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1825                               partIdx, merge_idx, &current_mv);
1826     }
1827
1828     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1829     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1830
1831     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1832         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1833             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1834
1835     if (current_mv.pred_flag & PF_L0) {
1836         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1837         if (!ref0)
1838             return;
1839         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1840     }
1841     if (current_mv.pred_flag & PF_L1) {
1842         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1843         if (!ref1)
1844             return;
1845         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1846     }
1847
1848     if (current_mv.pred_flag == PF_L0) {
1849         int x0_c = x0 >> s->ps.sps->hshift[1];
1850         int y0_c = y0 >> s->ps.sps->vshift[1];
1851         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1852         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1853
1854         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1855                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1856                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1857                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1858
1859         if (s->ps.sps->chroma_format_idc) {
1860             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1861                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1862                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1863             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1864                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1865                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1866         }
1867     } else if (current_mv.pred_flag == PF_L1) {
1868         int x0_c = x0 >> s->ps.sps->hshift[1];
1869         int y0_c = y0 >> s->ps.sps->vshift[1];
1870         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1871         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1872
1873         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1874                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1875                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1876                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1877
1878         if (s->ps.sps->chroma_format_idc) {
1879             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1880                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1881                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1882
1883             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1884                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1885                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1886         }
1887     } else if (current_mv.pred_flag == PF_BI) {
1888         int x0_c = x0 >> s->ps.sps->hshift[1];
1889         int y0_c = y0 >> s->ps.sps->vshift[1];
1890         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1891         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1892
1893         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1894                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1895                    ref1->frame, &current_mv.mv[1], &current_mv);
1896
1897         if (s->ps.sps->chroma_format_idc) {
1898             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1899                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1900
1901             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1902                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1903         }
1904     }
1905 }
1906
1907 /**
1908  * 8.4.1
1909  */
1910 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1911                                 int prev_intra_luma_pred_flag)
1912 {
1913     HEVCLocalContext *lc = s->HEVClc;
1914     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1915     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1916     int min_pu_width     = s->ps.sps->min_pu_width;
1917     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1918     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1919     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1920
1921     int cand_up   = (lc->ctb_up_flag || y0b) ?
1922                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1923     int cand_left = (lc->ctb_left_flag || x0b) ?
1924                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1925
1926     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1927
1928     MvField *tab_mvf = s->ref->tab_mvf;
1929     int intra_pred_mode;
1930     int candidate[3];
1931     int i, j;
1932
1933     // intra_pred_mode prediction does not cross vertical CTB boundaries
1934     if ((y0 - 1) < y_ctb)
1935         cand_up = INTRA_DC;
1936
1937     if (cand_left == cand_up) {
1938         if (cand_left < 2) {
1939             candidate[0] = INTRA_PLANAR;
1940             candidate[1] = INTRA_DC;
1941             candidate[2] = INTRA_ANGULAR_26;
1942         } else {
1943             candidate[0] = cand_left;
1944             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1945             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1946         }
1947     } else {
1948         candidate[0] = cand_left;
1949         candidate[1] = cand_up;
1950         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1951             candidate[2] = INTRA_PLANAR;
1952         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1953             candidate[2] = INTRA_DC;
1954         } else {
1955             candidate[2] = INTRA_ANGULAR_26;
1956         }
1957     }
1958
1959     if (prev_intra_luma_pred_flag) {
1960         intra_pred_mode = candidate[lc->pu.mpm_idx];
1961     } else {
1962         if (candidate[0] > candidate[1])
1963             FFSWAP(uint8_t, candidate[0], candidate[1]);
1964         if (candidate[0] > candidate[2])
1965             FFSWAP(uint8_t, candidate[0], candidate[2]);
1966         if (candidate[1] > candidate[2])
1967             FFSWAP(uint8_t, candidate[1], candidate[2]);
1968
1969         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1970         for (i = 0; i < 3; i++)
1971             if (intra_pred_mode >= candidate[i])
1972                 intra_pred_mode++;
1973     }
1974
1975     /* write the intra prediction units into the mv array */
1976     if (!size_in_pus)
1977         size_in_pus = 1;
1978     for (i = 0; i < size_in_pus; i++) {
1979         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1980                intra_pred_mode, size_in_pus);
1981
1982         for (j = 0; j < size_in_pus; j++) {
1983             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1984         }
1985     }
1986
1987     return intra_pred_mode;
1988 }
1989
1990 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1991                                           int log2_cb_size, int ct_depth)
1992 {
1993     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1994     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1995     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1996     int y;
1997
1998     for (y = 0; y < length; y++)
1999         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2000                ct_depth, length);
2001 }
2002
2003 static const uint8_t tab_mode_idx[] = {
2004      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2005     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2006
2007 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2008                                   int log2_cb_size)
2009 {
2010     HEVCLocalContext *lc = s->HEVClc;
2011     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2012     uint8_t prev_intra_luma_pred_flag[4];
2013     int split   = lc->cu.part_mode == PART_NxN;
2014     int pb_size = (1 << log2_cb_size) >> split;
2015     int side    = split + 1;
2016     int chroma_mode;
2017     int i, j;
2018
2019     for (i = 0; i < side; i++)
2020         for (j = 0; j < side; j++)
2021             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2022
2023     for (i = 0; i < side; i++) {
2024         for (j = 0; j < side; j++) {
2025             if (prev_intra_luma_pred_flag[2 * i + j])
2026                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2027             else
2028                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2029
2030             lc->pu.intra_pred_mode[2 * i + j] =
2031                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2032                                      prev_intra_luma_pred_flag[2 * i + j]);
2033         }
2034     }
2035
2036     if (s->ps.sps->chroma_format_idc == 3) {
2037         for (i = 0; i < side; i++) {
2038             for (j = 0; j < side; j++) {
2039                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2040                 if (chroma_mode != 4) {
2041                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2042                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2043                     else
2044                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2045                 } else {
2046                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2047                 }
2048             }
2049         }
2050     } else if (s->ps.sps->chroma_format_idc == 2) {
2051         int mode_idx;
2052         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2053         if (chroma_mode != 4) {
2054             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2055                 mode_idx = 34;
2056             else
2057                 mode_idx = intra_chroma_table[chroma_mode];
2058         } else {
2059             mode_idx = lc->pu.intra_pred_mode[0];
2060         }
2061         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2062     } else if (s->ps.sps->chroma_format_idc != 0) {
2063         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2064         if (chroma_mode != 4) {
2065             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2066                 lc->pu.intra_pred_mode_c[0] = 34;
2067             else
2068                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2069         } else {
2070             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2071         }
2072     }
2073 }
2074
2075 static void intra_prediction_unit_default_value(HEVCContext *s,
2076                                                 int x0, int y0,
2077                                                 int log2_cb_size)
2078 {
2079     HEVCLocalContext *lc = s->HEVClc;
2080     int pb_size          = 1 << log2_cb_size;
2081     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2082     int min_pu_width     = s->ps.sps->min_pu_width;
2083     MvField *tab_mvf     = s->ref->tab_mvf;
2084     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2085     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2086     int j, k;
2087
2088     if (size_in_pus == 0)
2089         size_in_pus = 1;
2090     for (j = 0; j < size_in_pus; j++)
2091         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2092     if (lc->cu.pred_mode == MODE_INTRA)
2093         for (j = 0; j < size_in_pus; j++)
2094             for (k = 0; k < size_in_pus; k++)
2095                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2096 }
2097
2098 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2099 {
2100     int cb_size          = 1 << log2_cb_size;
2101     HEVCLocalContext *lc = s->HEVClc;
2102     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2103     int length           = cb_size >> log2_min_cb_size;
2104     int min_cb_width     = s->ps.sps->min_cb_width;
2105     int x_cb             = x0 >> log2_min_cb_size;
2106     int y_cb             = y0 >> log2_min_cb_size;
2107     int idx              = log2_cb_size - 2;
2108     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2109     int x, y, ret;
2110
2111     lc->cu.x                = x0;
2112     lc->cu.y                = y0;
2113     lc->cu.pred_mode        = MODE_INTRA;
2114     lc->cu.part_mode        = PART_2Nx2N;
2115     lc->cu.intra_split_flag = 0;
2116
2117     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2118     for (x = 0; x < 4; x++)
2119         lc->pu.intra_pred_mode[x] = 1;
2120     if (s->ps.pps->transquant_bypass_enable_flag) {
2121         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2122         if (lc->cu.cu_transquant_bypass_flag)
2123             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2124     } else
2125         lc->cu.cu_transquant_bypass_flag = 0;
2126
2127     if (s->sh.slice_type != HEVC_SLICE_I) {
2128         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2129
2130         x = y_cb * min_cb_width + x_cb;
2131         for (y = 0; y < length; y++) {
2132             memset(&s->skip_flag[x], skip_flag, length);
2133             x += min_cb_width;
2134         }
2135         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2136     } else {
2137         x = y_cb * min_cb_width + x_cb;
2138         for (y = 0; y < length; y++) {
2139             memset(&s->skip_flag[x], 0, length);
2140             x += min_cb_width;
2141         }
2142     }
2143
2144     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2145         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2146         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2147
2148         if (!s->sh.disable_deblocking_filter_flag)
2149             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2150     } else {
2151         int pcm_flag = 0;
2152
2153         if (s->sh.slice_type != HEVC_SLICE_I)
2154             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2155         if (lc->cu.pred_mode != MODE_INTRA ||
2156             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2157             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2158             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2159                                       lc->cu.pred_mode == MODE_INTRA;
2160         }
2161
2162         if (lc->cu.pred_mode == MODE_INTRA) {
2163             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2164                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2165                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2166                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2167             }
2168             if (pcm_flag) {
2169                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2170                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2171                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2172                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2173
2174                 if (ret < 0)
2175                     return ret;
2176             } else {
2177                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2178             }
2179         } else {
2180             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2181             switch (lc->cu.part_mode) {
2182             case PART_2Nx2N:
2183                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2184                 break;
2185             case PART_2NxN:
2186                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2187                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2188                 break;
2189             case PART_Nx2N:
2190                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2191                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2192                 break;
2193             case PART_2NxnU:
2194                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2195                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2196                 break;
2197             case PART_2NxnD:
2198                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2199                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2200                 break;
2201             case PART_nLx2N:
2202                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2203                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2204                 break;
2205             case PART_nRx2N:
2206                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2207                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2208                 break;
2209             case PART_NxN:
2210                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2211                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2212                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2213                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2214                 break;
2215             }
2216         }
2217
2218         if (!pcm_flag) {
2219             int rqt_root_cbf = 1;
2220
2221             if (lc->cu.pred_mode != MODE_INTRA &&
2222                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2223                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2224             }
2225             if (rqt_root_cbf) {
2226                 const static int cbf[2] = { 0 };
2227                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2228                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2229                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2230                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2231                                          log2_cb_size,
2232                                          log2_cb_size, 0, 0, cbf, cbf);
2233                 if (ret < 0)
2234                     return ret;
2235             } else {
2236                 if (!s->sh.disable_deblocking_filter_flag)
2237                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2238             }
2239         }
2240     }
2241
2242     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2243         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2244
2245     x = y_cb * min_cb_width + x_cb;
2246     for (y = 0; y < length; y++) {
2247         memset(&s->qp_y_tab[x], lc->qp_y, length);
2248         x += min_cb_width;
2249     }
2250
2251     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2252        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2253         lc->qPy_pred = lc->qp_y;
2254     }
2255
2256     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2257
2258     return 0;
2259 }
2260
2261 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2262                                int log2_cb_size, int cb_depth)
2263 {
2264     HEVCLocalContext *lc = s->HEVClc;
2265     const int cb_size    = 1 << log2_cb_size;
2266     int ret;
2267     int split_cu;
2268
2269     lc->ct_depth = cb_depth;
2270     if (x0 + cb_size <= s->ps.sps->width  &&
2271         y0 + cb_size <= s->ps.sps->height &&
2272         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2273         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2274     } else {
2275         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2276     }
2277     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2278         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2279         lc->tu.is_cu_qp_delta_coded = 0;
2280         lc->tu.cu_qp_delta          = 0;
2281     }
2282
2283     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2284         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2285         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2286     }
2287
2288     if (split_cu) {
2289         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2290         const int cb_size_split = cb_size >> 1;
2291         const int x1 = x0 + cb_size_split;
2292         const int y1 = y0 + cb_size_split;
2293
2294         int more_data = 0;
2295
2296         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2297         if (more_data < 0)
2298             return more_data;
2299
2300         if (more_data && x1 < s->ps.sps->width) {
2301             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2302             if (more_data < 0)
2303                 return more_data;
2304         }
2305         if (more_data && y1 < s->ps.sps->height) {
2306             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2307             if (more_data < 0)
2308                 return more_data;
2309         }
2310         if (more_data && x1 < s->ps.sps->width &&
2311             y1 < s->ps.sps->height) {
2312             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2313             if (more_data < 0)
2314                 return more_data;
2315         }
2316
2317         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2318             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2319             lc->qPy_pred = lc->qp_y;
2320
2321         if (more_data)
2322             return ((x1 + cb_size_split) < s->ps.sps->width ||
2323                     (y1 + cb_size_split) < s->ps.sps->height);
2324         else
2325             return 0;
2326     } else {
2327         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2328         if (ret < 0)
2329             return ret;
2330         if ((!((x0 + cb_size) %
2331                (1 << (s->ps.sps->log2_ctb_size))) ||
2332              (x0 + cb_size >= s->ps.sps->width)) &&
2333             (!((y0 + cb_size) %
2334                (1 << (s->ps.sps->log2_ctb_size))) ||
2335              (y0 + cb_size >= s->ps.sps->height))) {
2336             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2337             return !end_of_slice_flag;
2338         } else {
2339             return 1;
2340         }
2341     }
2342
2343     return 0;
2344 }
2345
2346 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2347                                  int ctb_addr_ts)
2348 {
2349     HEVCLocalContext *lc  = s->HEVClc;
2350     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2351     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2352     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2353
2354     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2355
2356     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2357         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2358             lc->first_qp_group = 1;
2359         lc->end_of_tiles_x = s->ps.sps->width;
2360     } else if (s->ps.pps->tiles_enabled_flag) {
2361         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2362             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2363             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2364             lc->first_qp_group   = 1;
2365         }
2366     } else {
2367         lc->end_of_tiles_x = s->ps.sps->width;
2368     }
2369
2370     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2371
2372     lc->boundary_flags = 0;
2373     if (s->ps.pps->tiles_enabled_flag) {
2374         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2375             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2376         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2377             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2378         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2379             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2380         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2381             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2382     } else {
2383         if (ctb_addr_in_slice <= 0)
2384             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2385         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2386             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2387     }
2388
2389     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2390     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2391     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2392     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2393 }
2394
2395 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2396 {
2397     HEVCContext *s  = avctxt->priv_data;
2398     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2399     int more_data   = 1;
2400     int x_ctb       = 0;
2401     int y_ctb       = 0;
2402     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2403     int ret;
2404
2405     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2406         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2407         return AVERROR_INVALIDDATA;
2408     }
2409
2410     if (s->sh.dependent_slice_segment_flag) {
2411         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2412         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2413             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2414             return AVERROR_INVALIDDATA;
2415         }
2416     }
2417
2418     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2419         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2420
2421         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2422         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2423         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2424
2425         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2426         if (ret < 0) {
2427             s->tab_slice_address[ctb_addr_rs] = -1;
2428             return ret;
2429         }
2430
2431         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2432
2433         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2434         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2435         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2436
2437         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2438         if (more_data < 0) {
2439             s->tab_slice_address[ctb_addr_rs] = -1;
2440             return more_data;
2441         }
2442
2443
2444         ctb_addr_ts++;
2445         ff_hevc_save_states(s, ctb_addr_ts);
2446         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2447     }
2448
2449     if (x_ctb + ctb_size >= s->ps.sps->width &&
2450         y_ctb + ctb_size >= s->ps.sps->height)
2451         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2452
2453     return ctb_addr_ts;
2454 }
2455
2456 static int hls_slice_data(HEVCContext *s)
2457 {
2458     int arg[2];
2459     int ret[2];
2460
2461     arg[0] = 0;
2462     arg[1] = 1;
2463
2464     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2465     return ret[0];
2466 }
2467 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2468 {
2469     HEVCContext *s1  = avctxt->priv_data, *s;
2470     HEVCLocalContext *lc;
2471     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2472     int more_data   = 1;
2473     int *ctb_row_p    = input_ctb_row;
2474     int ctb_row = ctb_row_p[job];
2475     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2476     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2477     int thread = ctb_row % s1->threads_number;
2478     int ret;
2479
2480     s = s1->sList[self_id];
2481     lc = s->HEVClc;
2482
2483     if(ctb_row) {
2484         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2485         if (ret < 0)
2486             goto error;
2487         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2488     }
2489
2490     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2491         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2492         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2493
2494         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2495
2496         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2497
2498         if (atomic_load(&s1->wpp_err)) {
2499             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2500             return 0;
2501         }
2502
2503         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2504         if (ret < 0)
2505             goto error;
2506         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2507         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2508
2509         if (more_data < 0) {
2510             ret = more_data;
2511             goto error;
2512         }
2513
2514         ctb_addr_ts++;
2515
2516         ff_hevc_save_states(s, ctb_addr_ts);
2517         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2518         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2519
2520         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2521             atomic_store(&s1->wpp_err, 1);
2522             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2523             return 0;
2524         }
2525
2526         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2527             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2528             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2529             return ctb_addr_ts;
2530         }
2531         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2532         x_ctb+=ctb_size;
2533
2534         if(x_ctb >= s->ps.sps->width) {
2535             break;
2536         }
2537     }
2538     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2539
2540     return 0;
2541 error:
2542     s->tab_slice_address[ctb_addr_rs] = -1;
2543     atomic_store(&s1->wpp_err, 1);
2544     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2545     return ret;
2546 }
2547
2548 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2549 {
2550     const uint8_t *data = nal->data;
2551     int length          = nal->size;
2552     HEVCLocalContext *lc = s->HEVClc;
2553     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2554     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2555     int64_t offset;
2556     int64_t startheader, cmpt = 0;
2557     int i, j, res = 0;
2558
2559     if (!ret || !arg) {
2560         av_free(ret);
2561         av_free(arg);
2562         return AVERROR(ENOMEM);
2563     }
2564
2565     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2566         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2567             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2568             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2569         );
2570         res = AVERROR_INVALIDDATA;
2571         goto error;
2572     }
2573
2574     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2575
2576     if (!s->sList[1]) {
2577         for (i = 1; i < s->threads_number; i++) {
2578             s->sList[i] = av_malloc(sizeof(HEVCContext));
2579             memcpy(s->sList[i], s, sizeof(HEVCContext));
2580             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2581             s->sList[i]->HEVClc = s->HEVClcList[i];
2582         }
2583     }
2584
2585     offset = (lc->gb.index >> 3);
2586
2587     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2588         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2589             startheader--;
2590             cmpt++;
2591         }
2592     }
2593
2594     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2595         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2596         for (j = 0, cmpt = 0, startheader = offset
2597              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2598             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2599                 startheader--;
2600                 cmpt++;
2601             }
2602         }
2603         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2604         s->sh.offset[i - 1] = offset;
2605
2606     }
2607     if (s->sh.num_entry_point_offsets != 0) {
2608         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2609         if (length < offset) {
2610             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2611             res = AVERROR_INVALIDDATA;
2612             goto error;
2613         }
2614         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2615         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2616
2617     }
2618     s->data = data;
2619
2620     for (i = 1; i < s->threads_number; i++) {
2621         s->sList[i]->HEVClc->first_qp_group = 1;
2622         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2623         memcpy(s->sList[i], s, sizeof(HEVCContext));
2624         s->sList[i]->HEVClc = s->HEVClcList[i];
2625     }
2626
2627     atomic_store(&s->wpp_err, 0);
2628     ff_reset_entries(s->avctx);
2629
2630     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2631         arg[i] = i;
2632         ret[i] = 0;
2633     }
2634
2635     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2636         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2637
2638     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2639         res += ret[i];
2640 error:
2641     av_free(ret);
2642     av_free(arg);
2643     return res;
2644 }
2645
2646 static int set_side_data(HEVCContext *s)
2647 {
2648     AVFrame *out = s->ref->frame;
2649
2650     if (s->sei.frame_packing.present &&
2651         s->sei.frame_packing.arrangement_type >= 3 &&
2652         s->sei.frame_packing.arrangement_type <= 5 &&
2653         s->sei.frame_packing.content_interpretation_type > 0 &&
2654         s->sei.frame_packing.content_interpretation_type < 3) {
2655         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2656         if (!stereo)
2657             return AVERROR(ENOMEM);
2658
2659         switch (s->sei.frame_packing.arrangement_type) {
2660         case 3:
2661             if (s->sei.frame_packing.quincunx_subsampling)
2662                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2663             else
2664                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2665             break;
2666         case 4:
2667             stereo->type = AV_STEREO3D_TOPBOTTOM;
2668             break;
2669         case 5:
2670             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2671             break;
2672         }
2673
2674         if (s->sei.frame_packing.content_interpretation_type == 2)
2675             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2676
2677         if (s->sei.frame_packing.arrangement_type == 5) {
2678             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2679                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2680             else
2681                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2682         }
2683     }
2684
2685     if (s->sei.display_orientation.present &&
2686         (s->sei.display_orientation.anticlockwise_rotation ||
2687          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2688         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2689         AVFrameSideData *rotation = av_frame_new_side_data(out,
2690                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2691                                                            sizeof(int32_t) * 9);
2692         if (!rotation)
2693             return AVERROR(ENOMEM);
2694
2695         av_display_rotation_set((int32_t *)rotation->data, angle);
2696         av_display_matrix_flip((int32_t *)rotation->data,
2697                                s->sei.display_orientation.hflip,
2698                                s->sei.display_orientation.vflip);
2699     }
2700
2701     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2702     // so the side data persists for the entire coded video sequence.
2703     if (s->sei.mastering_display.present > 0 &&
2704         IS_IRAP(s) && s->no_rasl_output_flag) {
2705         s->sei.mastering_display.present--;
2706     }
2707     if (s->sei.mastering_display.present) {
2708         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2709         const int mapping[3] = {2, 0, 1};
2710         const int chroma_den = 50000;
2711         const int luma_den = 10000;
2712         int i;
2713         AVMasteringDisplayMetadata *metadata =
2714             av_mastering_display_metadata_create_side_data(out);
2715         if (!metadata)
2716             return AVERROR(ENOMEM);
2717
2718         for (i = 0; i < 3; i++) {
2719             const int j = mapping[i];
2720             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2721             metadata->display_primaries[i][0].den = chroma_den;
2722             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2723             metadata->display_primaries[i][1].den = chroma_den;
2724         }
2725         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2726         metadata->white_point[0].den = chroma_den;
2727         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2728         metadata->white_point[1].den = chroma_den;
2729
2730         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2731         metadata->max_luminance.den = luma_den;
2732         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2733         metadata->min_luminance.den = luma_den;
2734         metadata->has_luminance = 1;
2735         metadata->has_primaries = 1;
2736
2737         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2738         av_log(s->avctx, AV_LOG_DEBUG,
2739                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2740                av_q2d(metadata->display_primaries[0][0]),
2741                av_q2d(metadata->display_primaries[0][1]),
2742                av_q2d(metadata->display_primaries[1][0]),
2743                av_q2d(metadata->display_primaries[1][1]),
2744                av_q2d(metadata->display_primaries[2][0]),
2745                av_q2d(metadata->display_primaries[2][1]),
2746                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2747         av_log(s->avctx, AV_LOG_DEBUG,
2748                "min_luminance=%f, max_luminance=%f\n",
2749                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2750     }
2751     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2752     // so the side data persists for the entire coded video sequence.
2753     if (s->sei.content_light.present > 0 &&
2754         IS_IRAP(s) && s->no_rasl_output_flag) {
2755         s->sei.content_light.present--;
2756     }
2757     if (s->sei.content_light.present) {
2758         AVContentLightMetadata *metadata =
2759             av_content_light_metadata_create_side_data(out);
2760         if (!metadata)
2761             return AVERROR(ENOMEM);
2762         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2763         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2764
2765         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2766         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2767                metadata->MaxCLL, metadata->MaxFALL);
2768     }
2769
2770     if (s->sei.a53_caption.a53_caption) {
2771         AVFrameSideData* sd = av_frame_new_side_data(out,
2772                                                      AV_FRAME_DATA_A53_CC,
2773                                                      s->sei.a53_caption.a53_caption_size);
2774         if (sd)
2775             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2776         av_freep(&s->sei.a53_caption.a53_caption);
2777         s->sei.a53_caption.a53_caption_size = 0;
2778         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2779     }
2780
2781     if (s->sei.alternative_transfer.present &&
2782         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2783         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2784         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2785     }
2786
2787     return 0;
2788 }
2789
2790 static int hevc_frame_start(HEVCContext *s)
2791 {
2792     HEVCLocalContext *lc = s->HEVClc;
2793     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2794                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2795     int ret;
2796
2797     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2798     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2799     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2800     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2801     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2802
2803     s->is_decoded        = 0;
2804     s->first_nal_type    = s->nal_unit_type;
2805
2806     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2807
2808     if (s->ps.pps->tiles_enabled_flag)
2809         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2810
2811     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2812     if (ret < 0)
2813         goto fail;
2814
2815     ret = ff_hevc_frame_rps(s);
2816     if (ret < 0) {
2817         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2818         goto fail;
2819     }
2820
2821     s->ref->frame->key_frame = IS_IRAP(s);
2822
2823     ret = set_side_data(s);
2824     if (ret < 0)
2825         goto fail;
2826
2827     s->frame->pict_type = 3 - s->sh.slice_type;
2828
2829     if (!IS_IRAP(s))
2830         ff_hevc_bump_frame(s);
2831
2832     av_frame_unref(s->output_frame);
2833     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2834     if (ret < 0)
2835         goto fail;
2836
2837     if (!s->avctx->hwaccel)
2838         ff_thread_finish_setup(s->avctx);
2839
2840     return 0;
2841
2842 fail:
2843     if (s->ref)
2844         ff_hevc_unref_frame(s, s->ref, ~0);
2845     s->ref = NULL;
2846     return ret;
2847 }
2848
2849 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2850 {
2851     HEVCLocalContext *lc = s->HEVClc;
2852     GetBitContext *gb    = &lc->gb;
2853     int ctb_addr_ts, ret;
2854
2855     *gb              = nal->gb;
2856     s->nal_unit_type = nal->type;
2857     s->temporal_id   = nal->temporal_id;
2858
2859     switch (s->nal_unit_type) {
2860     case HEVC_NAL_VPS:
2861         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2862             ret = s->avctx->hwaccel->decode_params(s->avctx,
2863                                                    nal->type,
2864                                                    nal->raw_data,
2865                                                    nal->raw_size);
2866             if (ret < 0)
2867                 goto fail;
2868         }
2869         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2870         if (ret < 0)
2871             goto fail;
2872         break;
2873     case HEVC_NAL_SPS:
2874         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2875             ret = s->avctx->hwaccel->decode_params(s->avctx,
2876                                                    nal->type,
2877                                                    nal->raw_data,
2878                                                    nal->raw_size);
2879             if (ret < 0)
2880                 goto fail;
2881         }
2882         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2883                                      s->apply_defdispwin);
2884         if (ret < 0)
2885             goto fail;
2886         break;
2887     case HEVC_NAL_PPS:
2888         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2889             ret = s->avctx->hwaccel->decode_params(s->avctx,
2890                                                    nal->type,
2891                                                    nal->raw_data,
2892                                                    nal->raw_size);
2893             if (ret < 0)
2894                 goto fail;
2895         }
2896         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2897         if (ret < 0)
2898             goto fail;
2899         break;
2900     case HEVC_NAL_SEI_PREFIX:
2901     case HEVC_NAL_SEI_SUFFIX:
2902         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2903             ret = s->avctx->hwaccel->decode_params(s->avctx,
2904                                                    nal->type,
2905                                                    nal->raw_data,
2906                                                    nal->raw_size);
2907             if (ret < 0)
2908                 goto fail;
2909         }
2910         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2911         if (ret < 0)
2912             goto fail;
2913         break;
2914     case HEVC_NAL_TRAIL_R:
2915     case HEVC_NAL_TRAIL_N:
2916     case HEVC_NAL_TSA_N:
2917     case HEVC_NAL_TSA_R:
2918     case HEVC_NAL_STSA_N:
2919     case HEVC_NAL_STSA_R:
2920     case HEVC_NAL_BLA_W_LP:
2921     case HEVC_NAL_BLA_W_RADL:
2922     case HEVC_NAL_BLA_N_LP:
2923     case HEVC_NAL_IDR_W_RADL:
2924     case HEVC_NAL_IDR_N_LP:
2925     case HEVC_NAL_CRA_NUT:
2926     case HEVC_NAL_RADL_N:
2927     case HEVC_NAL_RADL_R:
2928     case HEVC_NAL_RASL_N:
2929     case HEVC_NAL_RASL_R:
2930         ret = hls_slice_header(s);
2931         if (ret < 0)
2932             return ret;
2933         if (ret == 1) {
2934             ret = AVERROR_INVALIDDATA;
2935             goto fail;
2936         }
2937
2938
2939         if (
2940             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2941             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2942             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2943             break;
2944         }
2945
2946         if (s->sh.first_slice_in_pic_flag) {
2947             if (s->max_ra == INT_MAX) {
2948                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2949                     s->max_ra = s->poc;
2950                 } else {
2951                     if (IS_IDR(s))
2952                         s->max_ra = INT_MIN;
2953                 }
2954             }
2955
2956             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2957                 s->poc <= s->max_ra) {
2958                 s->is_decoded = 0;
2959                 break;
2960             } else {
2961                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2962                     s->max_ra = INT_MIN;
2963             }
2964
2965             s->overlap ++;
2966             ret = hevc_frame_start(s);
2967             if (ret < 0)
2968                 return ret;
2969         } else if (!s->ref) {
2970             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2971             goto fail;
2972         }
2973
2974         if (s->nal_unit_type != s->first_nal_type) {
2975             av_log(s->avctx, AV_LOG_ERROR,
2976                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2977                    s->first_nal_type, s->nal_unit_type);
2978             return AVERROR_INVALIDDATA;
2979         }
2980
2981         if (!s->sh.dependent_slice_segment_flag &&
2982             s->sh.slice_type != HEVC_SLICE_I) {
2983             ret = ff_hevc_slice_rpl(s);
2984             if (ret < 0) {
2985                 av_log(s->avctx, AV_LOG_WARNING,
2986                        "Error constructing the reference lists for the current slice.\n");
2987                 goto fail;
2988             }
2989         }
2990
2991         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2992             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2993             if (ret < 0)
2994                 goto fail;
2995         }
2996
2997         if (s->avctx->hwaccel) {
2998             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2999             if (ret < 0)
3000                 goto fail;
3001         } else {
3002             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3003                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3004             else
3005                 ctb_addr_ts = hls_slice_data(s);
3006             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3007                 s->is_decoded = 1;
3008             }
3009
3010             if (ctb_addr_ts < 0) {
3011                 ret = ctb_addr_ts;
3012                 goto fail;
3013             }
3014         }
3015         break;
3016     case HEVC_NAL_EOS_NUT:
3017     case HEVC_NAL_EOB_NUT:
3018         s->seq_decode = (s->seq_decode + 1) & 0xff;
3019         s->max_ra     = INT_MAX;
3020         break;
3021     case HEVC_NAL_AUD:
3022     case HEVC_NAL_FD_NUT:
3023         break;
3024     default:
3025         av_log(s->avctx, AV_LOG_INFO,
3026                "Skipping NAL unit %d\n", s->nal_unit_type);
3027     }
3028
3029     return 0;
3030 fail:
3031     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3032         return ret;
3033     return 0;
3034 }
3035
3036 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3037 {
3038     int i, ret = 0;
3039     int eos_at_start = 1;
3040
3041     s->ref = NULL;
3042     s->last_eos = s->eos;
3043     s->eos = 0;
3044     s->overlap = 0;
3045
3046     /* split the input packet into NAL units, so we know the upper bound on the
3047      * number of slices in the frame */
3048     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3049                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3050     if (ret < 0) {
3051         av_log(s->avctx, AV_LOG_ERROR,
3052                "Error splitting the input into NAL units.\n");
3053         return ret;
3054     }
3055
3056     for (i = 0; i < s->pkt.nb_nals; i++) {
3057         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3058             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3059             if (eos_at_start) {
3060                 s->last_eos = 1;
3061             } else {
3062                 s->eos = 1;
3063             }
3064         } else {
3065             eos_at_start = 0;
3066         }
3067     }
3068
3069     /* decode the NAL units */
3070     for (i = 0; i < s->pkt.nb_nals; i++) {
3071         H2645NAL *nal = &s->pkt.nals[i];
3072
3073         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3074             (s->avctx->skip_frame >= AVDISCARD_NONREF
3075             && ff_hevc_nal_is_nonref(nal->type)))
3076             continue;
3077
3078         ret = decode_nal_unit(s, nal);
3079         if (ret >= 0 && s->overlap > 2)
3080             ret = AVERROR_INVALIDDATA;
3081         if (ret < 0) {
3082             av_log(s->avctx, AV_LOG_WARNING,
3083                    "Error parsing NAL unit #%d.\n", i);
3084             goto fail;
3085         }
3086     }
3087
3088 fail:
3089     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3090         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3091
3092     return ret;
3093 }
3094
3095 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3096 {
3097     int i;
3098     for (i = 0; i < 16; i++)
3099         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3100 }
3101
3102 static int verify_md5(HEVCContext *s, AVFrame *frame)
3103 {
3104     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3105     int pixel_shift;
3106     int i, j;
3107
3108     if (!desc)
3109         return AVERROR(EINVAL);
3110
3111     pixel_shift = desc->comp[0].depth > 8;
3112
3113     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3114            s->poc);
3115
3116     /* the checksums are LE, so we have to byteswap for >8bpp formats
3117      * on BE arches */
3118 #if HAVE_BIGENDIAN
3119     if (pixel_shift && !s->checksum_buf) {
3120         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3121                        FFMAX3(frame->linesize[0], frame->linesize[1],
3122                               frame->linesize[2]));
3123         if (!s->checksum_buf)
3124             return AVERROR(ENOMEM);
3125     }
3126 #endif
3127
3128     for (i = 0; frame->data[i]; i++) {
3129         int width  = s->avctx->coded_width;
3130         int height = s->avctx->coded_height;
3131         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3132         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3133         uint8_t md5[16];
3134
3135         av_md5_init(s->md5_ctx);
3136         for (j = 0; j < h; j++) {
3137             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3138 #if HAVE_BIGENDIAN
3139             if (pixel_shift) {
3140                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3141                                     (const uint16_t *) src, w);
3142                 src = s->checksum_buf;
3143             }
3144 #endif
3145             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3146         }
3147         av_md5_final(s->md5_ctx, md5);
3148
3149         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3150             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3151             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3152             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3153         } else {
3154             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3155             print_md5(s->avctx, AV_LOG_ERROR, md5);
3156             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3157             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3158             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3159             return AVERROR_INVALIDDATA;
3160         }
3161     }
3162
3163     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3164
3165     return 0;
3166 }
3167
3168 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3169 {
3170     int ret, i;
3171
3172     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3173                                    &s->nal_length_size, s->avctx->err_recognition,
3174                                    s->apply_defdispwin, s->avctx);
3175     if (ret < 0)
3176         return ret;
3177
3178     /* export stream parameters from the first SPS */
3179     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3180         if (first && s->ps.sps_list[i]) {
3181             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3182             export_stream_params(s->avctx, &s->ps, sps);
3183             break;
3184         }
3185     }
3186
3187     return 0;
3188 }
3189
3190 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3191                              AVPacket *avpkt)
3192 {
3193     int ret;
3194     int new_extradata_size;
3195     uint8_t *new_extradata;
3196     HEVCContext *s = avctx->priv_data;
3197
3198     if (!avpkt->size) {
3199         ret = ff_hevc_output_frame(s, data, 1);
3200         if (ret < 0)
3201             return ret;
3202
3203         *got_output = ret;
3204         return 0;
3205     }
3206
3207     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3208                                             &new_extradata_size);
3209     if (new_extradata && new_extradata_size > 0) {
3210         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3211         if (ret < 0)
3212             return ret;
3213     }
3214
3215     s->ref = NULL;
3216     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3217     if (ret < 0)
3218         return ret;
3219
3220     if (avctx->hwaccel) {
3221         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3222             av_log(avctx, AV_LOG_ERROR,
3223                    "hardware accelerator failed to decode picture\n");
3224             ff_hevc_unref_frame(s, s->ref, ~0);
3225             return ret;
3226         }
3227     } else {
3228         /* verify the SEI checksum */
3229         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3230             s->sei.picture_hash.is_md5) {
3231             ret = verify_md5(s, s->ref->frame);
3232             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3233                 ff_hevc_unref_frame(s, s->ref, ~0);
3234                 return ret;
3235             }
3236         }
3237     }
3238     s->sei.picture_hash.is_md5 = 0;
3239
3240     if (s->is_decoded) {
3241         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3242         s->is_decoded = 0;
3243     }
3244
3245     if (s->output_frame->buf[0]) {
3246         av_frame_move_ref(data, s->output_frame);
3247         *got_output = 1;
3248     }
3249
3250     return avpkt->size;
3251 }
3252
3253 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3254 {
3255     int ret;
3256
3257     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3258     if (ret < 0)
3259         return ret;
3260
3261     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3262     if (!dst->tab_mvf_buf)
3263         goto fail;
3264     dst->tab_mvf = src->tab_mvf;
3265
3266     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3267     if (!dst->rpl_tab_buf)
3268         goto fail;
3269     dst->rpl_tab = src->rpl_tab;
3270
3271     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3272     if (!dst->rpl_buf)
3273         goto fail;
3274
3275     dst->poc        = src->poc;
3276     dst->ctb_count  = src->ctb_count;
3277     dst->flags      = src->flags;
3278     dst->sequence   = src->sequence;
3279
3280     if (src->hwaccel_picture_private) {
3281         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3282         if (!dst->hwaccel_priv_buf)
3283             goto fail;
3284         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3285     }
3286
3287     return 0;
3288 fail:
3289     ff_hevc_unref_frame(s, dst, ~0);
3290     return AVERROR(ENOMEM);
3291 }
3292
3293 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3294 {
3295     HEVCContext       *s = avctx->priv_data;
3296     int i;
3297
3298     pic_arrays_free(s);
3299
3300     av_freep(&s->md5_ctx);
3301
3302     av_freep(&s->cabac_state);
3303
3304     for (i = 0; i < 3; i++) {
3305         av_freep(&s->sao_pixel_buffer_h[i]);
3306         av_freep(&s->sao_pixel_buffer_v[i]);
3307     }
3308     av_frame_free(&s->output_frame);
3309
3310     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3311         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3312         av_frame_free(&s->DPB[i].frame);
3313     }
3314
3315     ff_hevc_ps_uninit(&s->ps);
3316
3317     av_freep(&s->sh.entry_point_offset);
3318     av_freep(&s->sh.offset);
3319     av_freep(&s->sh.size);
3320
3321     for (i = 1; i < s->threads_number; i++) {
3322         HEVCLocalContext *lc = s->HEVClcList[i];
3323         if (lc) {
3324             av_freep(&s->HEVClcList[i]);
3325             av_freep(&s->sList[i]);
3326         }
3327     }
3328     if (s->HEVClc == s->HEVClcList[0])
3329         s->HEVClc = NULL;
3330     av_freep(&s->HEVClcList[0]);
3331
3332     ff_h2645_packet_uninit(&s->pkt);
3333
3334     return 0;
3335 }
3336
3337 static av_cold int hevc_init_context(AVCodecContext *avctx)
3338 {
3339     HEVCContext *s = avctx->priv_data;
3340     int i;
3341
3342     s->avctx = avctx;
3343
3344     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3345     if (!s->HEVClc)
3346         goto fail;
3347     s->HEVClcList[0] = s->HEVClc;
3348     s->sList[0] = s;
3349
3350     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3351     if (!s->cabac_state)
3352         goto fail;
3353
3354     s->output_frame = av_frame_alloc();
3355     if (!s->output_frame)
3356         goto fail;
3357
3358     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3359         s->DPB[i].frame = av_frame_alloc();
3360         if (!s->DPB[i].frame)
3361             goto fail;
3362         s->DPB[i].tf.f = s->DPB[i].frame;
3363     }
3364
3365     s->max_ra = INT_MAX;
3366
3367     s->md5_ctx = av_md5_alloc();
3368     if (!s->md5_ctx)
3369         goto fail;
3370
3371     ff_bswapdsp_init(&s->bdsp);
3372
3373     s->context_initialized = 1;
3374     s->eos = 0;
3375
3376     ff_hevc_reset_sei(&s->sei);
3377
3378     return 0;
3379
3380 fail:
3381     hevc_decode_free(avctx);
3382     return AVERROR(ENOMEM);
3383 }
3384
3385 #if HAVE_THREADS
3386 static int hevc_update_thread_context(AVCodecContext *dst,
3387                                       const AVCodecContext *src)
3388 {
3389     HEVCContext *s  = dst->priv_data;
3390     HEVCContext *s0 = src->priv_data;
3391     int i, ret;
3392
3393     if (!s->context_initialized) {
3394         ret = hevc_init_context(dst);
3395         if (ret < 0)
3396             return ret;
3397     }
3398
3399     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3400         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3401         if (s0->DPB[i].frame->buf[0]) {
3402             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3403             if (ret < 0)
3404                 return ret;
3405         }
3406     }
3407
3408     if (s->ps.sps != s0->ps.sps)
3409         s->ps.sps = NULL;
3410     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3411         av_buffer_unref(&s->ps.vps_list[i]);
3412         if (s0->ps.vps_list[i]) {
3413             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3414             if (!s->ps.vps_list[i])
3415                 return AVERROR(ENOMEM);
3416         }
3417     }
3418
3419     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3420         av_buffer_unref(&s->ps.sps_list[i]);
3421         if (s0->ps.sps_list[i]) {
3422             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3423             if (!s->ps.sps_list[i])
3424                 return AVERROR(ENOMEM);
3425         }
3426     }
3427
3428     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3429         av_buffer_unref(&s->ps.pps_list[i]);
3430         if (s0->ps.pps_list[i]) {
3431             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3432             if (!s->ps.pps_list[i])
3433                 return AVERROR(ENOMEM);
3434         }
3435     }
3436
3437     if (s->ps.sps != s0->ps.sps)
3438         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3439             return ret;
3440
3441     s->seq_decode = s0->seq_decode;
3442     s->seq_output = s0->seq_output;
3443     s->pocTid0    = s0->pocTid0;
3444     s->max_ra     = s0->max_ra;
3445     s->eos        = s0->eos;
3446     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3447
3448     s->is_nalff        = s0->is_nalff;
3449     s->nal_length_size = s0->nal_length_size;
3450
3451     s->threads_number      = s0->threads_number;
3452     s->threads_type        = s0->threads_type;
3453
3454     if (s0->eos) {
3455         s->seq_decode = (s->seq_decode + 1) & 0xff;
3456         s->max_ra = INT_MAX;
3457     }
3458
3459     s->sei.frame_packing        = s0->sei.frame_packing;
3460     s->sei.display_orientation  = s0->sei.display_orientation;
3461     s->sei.mastering_display    = s0->sei.mastering_display;
3462     s->sei.content_light        = s0->sei.content_light;
3463     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3464
3465     return 0;
3466 }
3467 #endif
3468
3469 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3470 {
3471     HEVCContext *s = avctx->priv_data;
3472     int ret;
3473
3474     avctx->internal->allocate_progress = 1;
3475
3476     ret = hevc_init_context(avctx);
3477     if (ret < 0)
3478         return ret;
3479
3480     s->enable_parallel_tiles = 0;
3481     s->sei.picture_timing.picture_struct = 0;
3482     s->eos = 1;
3483
3484     atomic_init(&s->wpp_err, 0);
3485
3486     if(avctx->active_thread_type & FF_THREAD_SLICE)
3487         s->threads_number = avctx->thread_count;
3488     else
3489         s->threads_number = 1;
3490
3491     if (avctx->extradata_size > 0 && avctx->extradata) {
3492         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3493         if (ret < 0) {
3494             hevc_decode_free(avctx);
3495             return ret;
3496         }
3497     }
3498
3499     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3500             s->threads_type = FF_THREAD_FRAME;
3501         else
3502             s->threads_type = FF_THREAD_SLICE;
3503
3504     return 0;
3505 }
3506
3507 #if HAVE_THREADS
3508 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3509 {
3510     HEVCContext *s = avctx->priv_data;
3511     int ret;
3512
3513     memset(s, 0, sizeof(*s));
3514
3515     ret = hevc_init_context(avctx);
3516     if (ret < 0)
3517         return ret;
3518
3519     return 0;
3520 }
3521 #endif
3522
3523 static void hevc_decode_flush(AVCodecContext *avctx)
3524 {
3525     HEVCContext *s = avctx->priv_data;
3526     ff_hevc_flush_dpb(s);
3527     s->max_ra = INT_MAX;
3528     s->eos = 1;
3529 }
3530
3531 #define OFFSET(x) offsetof(HEVCContext, x)
3532 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3533
3534 static const AVOption options[] = {
3535     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3536         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3537     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3538         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3539     { NULL },
3540 };
3541
3542 static const AVClass hevc_decoder_class = {
3543     .class_name = "HEVC decoder",
3544     .item_name  = av_default_item_name,
3545     .option     = options,
3546     .version    = LIBAVUTIL_VERSION_INT,
3547 };
3548
3549 AVCodec ff_hevc_decoder = {
3550     .name                  = "hevc",
3551     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3552     .type                  = AVMEDIA_TYPE_VIDEO,
3553     .id                    = AV_CODEC_ID_HEVC,
3554     .priv_data_size        = sizeof(HEVCContext),
3555     .priv_class            = &hevc_decoder_class,
3556     .init                  = hevc_decode_init,
3557     .close                 = hevc_decode_free,
3558     .decode                = hevc_decode_frame,
3559     .flush                 = hevc_decode_flush,
3560     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3561     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3562     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3563                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3564     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3565     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3566     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3567 #if CONFIG_HEVC_DXVA2_HWACCEL
3568                                HWACCEL_DXVA2(hevc),
3569 #endif
3570 #if CONFIG_HEVC_D3D11VA_HWACCEL
3571                                HWACCEL_D3D11VA(hevc),
3572 #endif
3573 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3574                                HWACCEL_D3D11VA2(hevc),
3575 #endif
3576 #if CONFIG_HEVC_NVDEC_HWACCEL
3577                                HWACCEL_NVDEC(hevc),
3578 #endif
3579 #if CONFIG_HEVC_VAAPI_HWACCEL
3580                                HWACCEL_VAAPI(hevc),
3581 #endif
3582 #if CONFIG_HEVC_VDPAU_HWACCEL
3583                                HWACCEL_VDPAU(hevc),
3584 #endif
3585 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3586                                HWACCEL_VIDEOTOOLBOX(hevc),
3587 #endif
3588                                NULL
3589                            },
3590 };