git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41 #include "hevc_data.h"
  42 #include "hevc_parse.h"
  43 #include "hevcdec.h"
  44 #include "hwaccel.h"
  45 #include "profiles.h"
  46
  47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  48
  49 /**
  50  * NOTE: Each function hls_foo correspond to the function foo in the
  51  * specification (HLS stands for High Level Syntax).
  52  */
  53
  54 /**
  55  * Section 5.7
  56  */
  57
  58 /* free everything allocated  by pic_arrays_init() */
  59 static void pic_arrays_free(HEVCContext *s)
  60 {
  61     av_freep(&s->sao);
  62     av_freep(&s->deblock);
  63
  64     av_freep(&s->skip_flag);
  65     av_freep(&s->tab_ct_depth);
  66
  67     av_freep(&s->tab_ipm);
  68     av_freep(&s->cbf_luma);
  69     av_freep(&s->is_pcm);
  70
  71     av_freep(&s->qp_y_tab);
  72     av_freep(&s->tab_slice_address);
  73     av_freep(&s->filter_slice_edges);
  74
  75     av_freep(&s->horizontal_bs);
  76     av_freep(&s->vertical_bs);
  77
  78     av_freep(&s->sh.entry_point_offset);
  79     av_freep(&s->sh.size);
  80     av_freep(&s->sh.offset);
  81
  82     av_buffer_pool_uninit(&s->tab_mvf_pool);
  83     av_buffer_pool_uninit(&s->rpl_tab_pool);
  84 }
  85
  86 /* allocate arrays that depend on frame dimensions */
  87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  88 {
  89     int log2_min_cb_size = sps->log2_min_cb_size;
  90     int width            = sps->width;
  91     int height           = sps->height;
  92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  93                            ((height >> log2_min_cb_size) + 1);
  94     int ctb_count        = sps->ctb_width * sps->ctb_height;
  95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  96
  97     s->bs_width  = (width  >> 2) + 1;
  98     s->bs_height = (height >> 2) + 1;
  99
 100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 102     if (!s->sao || !s->deblock)
 103         goto fail;
 104
 105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 107     if (!s->skip_flag || !s->tab_ct_depth)
 108         goto fail;
 109
 110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 111     s->tab_ipm  = av_mallocz(min_pu_size);
 112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 114         goto fail;
 115
 116     s->filter_slice_edges = av_mallocz(ctb_count);
 117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 118                                       sizeof(*s->tab_slice_address));
 119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 120                                       sizeof(*s->qp_y_tab));
 121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 122         goto fail;
 123
 124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 126     if (!s->horizontal_bs || !s->vertical_bs)
 127         goto fail;
 128
 129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 130                                           av_buffer_allocz);
 131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 132                                           av_buffer_allocz);
 133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 134         goto fail;
 135
 136     return 0;
 137
 138 fail:
 139     pic_arrays_free(s);
 140     return AVERROR(ENOMEM);
 141 }
 142
 143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 144 {
 145     int i = 0;
 146     int j = 0;
 147     uint8_t luma_weight_l0_flag[16];
 148     uint8_t chroma_weight_l0_flag[16];
 149     uint8_t luma_weight_l1_flag[16];
 150     uint8_t chroma_weight_l1_flag[16];
 151     int luma_log2_weight_denom;
 152
 153     luma_log2_weight_denom = get_ue_golomb_long(gb);
 154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
 155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 156         return AVERROR_INVALIDDATA;
 157     }
 158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 159     if (s->ps.sps->chroma_format_idc != 0) {
 160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
 161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
 162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
 163             return AVERROR_INVALIDDATA;
 164         }
 165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
 166     }
 167
 168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 169         luma_weight_l0_flag[i] = get_bits1(gb);
 170         if (!luma_weight_l0_flag[i]) {
 171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 172             s->sh.luma_offset_l0[i] = 0;
 173         }
 174     }
 175     if (s->ps.sps->chroma_format_idc != 0) {
 176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 177             chroma_weight_l0_flag[i] = get_bits1(gb);
 178     } else {
 179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 180             chroma_weight_l0_flag[i] = 0;
 181     }
 182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 183         if (luma_weight_l0_flag[i]) {
 184             int delta_luma_weight_l0 = get_se_golomb(gb);
 185             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 186             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 187         }
 188         if (chroma_weight_l0_flag[i]) {
 189             for (j = 0; j < 2; j++) {
 190                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 191                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 192
 193                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
 194                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
 195                     return AVERROR_INVALIDDATA;
 196                 }
 197
 198                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 199                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 200                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 201             }
 202         } else {
 203             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 204             s->sh.chroma_offset_l0[i][0] = 0;
 205             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 206             s->sh.chroma_offset_l0[i][1] = 0;
 207         }
 208     }
 209     if (s->sh.slice_type == HEVC_SLICE_B) {
 210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 211             luma_weight_l1_flag[i] = get_bits1(gb);
 212             if (!luma_weight_l1_flag[i]) {
 213                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 214                 s->sh.luma_offset_l1[i] = 0;
 215             }
 216         }
 217         if (s->ps.sps->chroma_format_idc != 0) {
 218             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 219                 chroma_weight_l1_flag[i] = get_bits1(gb);
 220         } else {
 221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 222                 chroma_weight_l1_flag[i] = 0;
 223         }
 224         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 225             if (luma_weight_l1_flag[i]) {
 226                 int delta_luma_weight_l1 = get_se_golomb(gb);
 227                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 228                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 229             }
 230             if (chroma_weight_l1_flag[i]) {
 231                 for (j = 0; j < 2; j++) {
 232                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 233                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 234
 235                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
 236                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
 237                         return AVERROR_INVALIDDATA;
 238                     }
 239
 240                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 241                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 242                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 243                 }
 244             } else {
 245                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 246                 s->sh.chroma_offset_l1[i][0] = 0;
 247                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 248                 s->sh.chroma_offset_l1[i][1] = 0;
 249             }
 250         }
 251     }
 252     return 0;
 253 }
 254
 255 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 256 {
 257     const HEVCSPS *sps = s->ps.sps;
 258     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 259     int prev_delta_msb = 0;
 260     unsigned int nb_sps = 0, nb_sh;
 261     int i;
 262
 263     rps->nb_refs = 0;
 264     if (!sps->long_term_ref_pics_present_flag)
 265         return 0;
 266
 267     if (sps->num_long_term_ref_pics_sps > 0)
 268         nb_sps = get_ue_golomb_long(gb);
 269     nb_sh = get_ue_golomb_long(gb);
 270
 271     if (nb_sps > sps->num_long_term_ref_pics_sps)
 272         return AVERROR_INVALIDDATA;
 273     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 274         return AVERROR_INVALIDDATA;
 275
 276     rps->nb_refs = nb_sh + nb_sps;
 277
 278     for (i = 0; i < rps->nb_refs; i++) {
 279         uint8_t delta_poc_msb_present;
 280
 281         if (i < nb_sps) {
 282             uint8_t lt_idx_sps = 0;
 283
 284             if (sps->num_long_term_ref_pics_sps > 1)
 285                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 286
 287             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 288             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 289         } else {
 290             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 291             rps->used[i] = get_bits1(gb);
 292         }
 293
 294         delta_poc_msb_present = get_bits1(gb);
 295         if (delta_poc_msb_present) {
 296             int64_t delta = get_ue_golomb_long(gb);
 297             int64_t poc;
 298
 299             if (i && i != nb_sps)
 300                 delta += prev_delta_msb;
 301
 302             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 303             if (poc != (int32_t)poc)
 304                 return AVERROR_INVALIDDATA;
 305             rps->poc[i] = poc;
 306             prev_delta_msb = delta;
 307         }
 308     }
 309
 310     return 0;
 311 }
 312
 313 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
 314 {
 315     AVCodecContext *avctx = s->avctx;
 316     const HEVCParamSets *ps = &s->ps;
 317     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 318     const HEVCWindow *ow = &sps->output_window;
 319     unsigned int num = 0, den = 0;
 320
 321     avctx->pix_fmt             = sps->pix_fmt;
 322     avctx->coded_width         = sps->width;
 323     avctx->coded_height        = sps->height;
 324     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 325     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 326     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 327     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 328     avctx->level               = sps->ptl.general_ptl.level_idc;
 329
 330     ff_set_sar(avctx, sps->vui.sar);
 331
 332     if (sps->vui.video_signal_type_present_flag)
 333         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 334                                                             : AVCOL_RANGE_MPEG;
 335     else
 336         avctx->color_range = AVCOL_RANGE_MPEG;
 337
 338     if (sps->vui.colour_description_present_flag) {
 339         avctx->color_primaries = sps->vui.colour_primaries;
 340         avctx->color_trc       = sps->vui.transfer_characteristic;
 341         avctx->colorspace      = sps->vui.matrix_coeffs;
 342     } else {
 343         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 344         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 345         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 346     }
 347
 348     if (vps->vps_timing_info_present_flag) {
 349         num = vps->vps_num_units_in_tick;
 350         den = vps->vps_time_scale;
 351     } else if (sps->vui.vui_timing_info_present_flag) {
 352         num = sps->vui.vui_num_units_in_tick;
 353         den = sps->vui.vui_time_scale;
 354     }
 355
 356     if (num != 0 && den != 0)
 357         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 358                   num, den, 1 << 30);
 359
 360     if (s->sei.alternative_transfer.present &&
 361         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
 362         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
 363         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
 364     }
 365 }
 366
 367 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 368 {
 369 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
 370                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 371                      CONFIG_HEVC_NVDEC_HWACCEL + \
 372                      CONFIG_HEVC_VAAPI_HWACCEL + \
 373                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
 374                      CONFIG_HEVC_VDPAU_HWACCEL)
 375     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 376
 377     switch (sps->pix_fmt) {
 378     case AV_PIX_FMT_YUV420P:
 379     case AV_PIX_FMT_YUVJ420P:
 380 #if CONFIG_HEVC_DXVA2_HWACCEL
 381         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 382 #endif
 383 #if CONFIG_HEVC_D3D11VA_HWACCEL
 384         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 385         *fmt++ = AV_PIX_FMT_D3D11;
 386 #endif
 387 #if CONFIG_HEVC_VAAPI_HWACCEL
 388         *fmt++ = AV_PIX_FMT_VAAPI;
 389 #endif
 390 #if CONFIG_HEVC_VDPAU_HWACCEL
 391         *fmt++ = AV_PIX_FMT_VDPAU;
 392 #endif
 393 #if CONFIG_HEVC_NVDEC_HWACCEL
 394         *fmt++ = AV_PIX_FMT_CUDA;
 395 #endif
 396 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 397         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 398 #endif
 399         break;
 400     case AV_PIX_FMT_YUV420P10:
 401 #if CONFIG_HEVC_DXVA2_HWACCEL
 402         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 403 #endif
 404 #if CONFIG_HEVC_D3D11VA_HWACCEL
 405         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 406         *fmt++ = AV_PIX_FMT_D3D11;
 407 #endif
 408 #if CONFIG_HEVC_VAAPI_HWACCEL
 409         *fmt++ = AV_PIX_FMT_VAAPI;
 410 #endif
 411 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 412         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 413 #endif
 414 #if CONFIG_HEVC_NVDEC_HWACCEL
 415         *fmt++ = AV_PIX_FMT_CUDA;
 416 #endif
 417         break;
 418     case AV_PIX_FMT_YUV444P:
 419 #if CONFIG_HEVC_VDPAU_HWACCEL
 420         *fmt++ = AV_PIX_FMT_VDPAU;
 421 #endif
 422 #if CONFIG_HEVC_NVDEC_HWACCEL
 423         *fmt++ = AV_PIX_FMT_CUDA;
 424 #endif
 425         break;
 426     case AV_PIX_FMT_YUV420P12:
 427     case AV_PIX_FMT_YUV444P10:
 428     case AV_PIX_FMT_YUV444P12:
 429 #if CONFIG_HEVC_NVDEC_HWACCEL
 430         *fmt++ = AV_PIX_FMT_CUDA;
 431 #endif
 432         break;
 433     }
 434
 435     *fmt++ = sps->pix_fmt;
 436     *fmt = AV_PIX_FMT_NONE;
 437
 438     return ff_thread_get_format(s->avctx, pix_fmts);
 439 }
 440
 441 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 442                    enum AVPixelFormat pix_fmt)
 443 {
 444     int ret, i;
 445
 446     pic_arrays_free(s);
 447     s->ps.sps = NULL;
 448     s->ps.vps = NULL;
 449
 450     if (!sps)
 451         return 0;
 452
 453     ret = pic_arrays_init(s, sps);
 454     if (ret < 0)
 455         goto fail;
 456
 457     export_stream_params(s, sps);
 458
 459     s->avctx->pix_fmt = pix_fmt;
 460
 461     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 462     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 463     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 464
 465     for (i = 0; i < 3; i++) {
 466         av_freep(&s->sao_pixel_buffer_h[i]);
 467         av_freep(&s->sao_pixel_buffer_v[i]);
 468     }
 469
 470     if (sps->sao_enabled && !s->avctx->hwaccel) {
 471         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 472         int c_idx;
 473
 474         for(c_idx = 0; c_idx < c_count; c_idx++) {
 475             int w = sps->width >> sps->hshift[c_idx];
 476             int h = sps->height >> sps->vshift[c_idx];
 477             s->sao_pixel_buffer_h[c_idx] =
 478                 av_malloc((w * 2 * sps->ctb_height) <<
 479                           sps->pixel_shift);
 480             s->sao_pixel_buffer_v[c_idx] =
 481                 av_malloc((h * 2 * sps->ctb_width) <<
 482                           sps->pixel_shift);
 483         }
 484     }
 485
 486     s->ps.sps = sps;
 487     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 488
 489     return 0;
 490
 491 fail:
 492     pic_arrays_free(s);
 493     s->ps.sps = NULL;
 494     return ret;
 495 }
 496
 497 static int hls_slice_header(HEVCContext *s)
 498 {
 499     GetBitContext *gb = &s->HEVClc->gb;
 500     SliceHeader *sh   = &s->sh;
 501     int i, ret;
 502
 503     // Coded parameters
 504     sh->first_slice_in_pic_flag = get_bits1(gb);
 505     if (s->ref && sh->first_slice_in_pic_flag) {
 506         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
 507         return 1; // This slice will be skiped later, do not corrupt state
 508     }
 509
 510     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 511         s->seq_decode = (s->seq_decode + 1) & 0xff;
 512         s->max_ra     = INT_MAX;
 513         if (IS_IDR(s))
 514             ff_hevc_clear_refs(s);
 515     }
 516     sh->no_output_of_prior_pics_flag = 0;
 517     if (IS_IRAP(s))
 518         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 519
 520     sh->pps_id = get_ue_golomb_long(gb);
 521     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 522         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 523         return AVERROR_INVALIDDATA;
 524     }
 525     if (!sh->first_slice_in_pic_flag &&
 526         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 527         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 528         return AVERROR_INVALIDDATA;
 529     }
 530     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 531     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 532         sh->no_output_of_prior_pics_flag = 1;
 533
 534     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 535         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 536         const HEVCSPS *last_sps = s->ps.sps;
 537         enum AVPixelFormat pix_fmt;
 538
 539         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 540             if (sps->width != last_sps->width || sps->height != last_sps->height ||
 541                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
 542                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 543                 sh->no_output_of_prior_pics_flag = 0;
 544         }
 545         ff_hevc_clear_refs(s);
 546
 547         ret = set_sps(s, sps, sps->pix_fmt);
 548         if (ret < 0)
 549             return ret;
 550
 551         pix_fmt = get_format(s, sps);
 552         if (pix_fmt < 0)
 553             return pix_fmt;
 554         s->avctx->pix_fmt = pix_fmt;
 555
 556         s->seq_decode = (s->seq_decode + 1) & 0xff;
 557         s->max_ra     = INT_MAX;
 558     }
 559
 560     sh->dependent_slice_segment_flag = 0;
 561     if (!sh->first_slice_in_pic_flag) {
 562         int slice_address_length;
 563
 564         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 565             sh->dependent_slice_segment_flag = get_bits1(gb);
 566
 567         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 568                                             s->ps.sps->ctb_height);
 569         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 570         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 571             av_log(s->avctx, AV_LOG_ERROR,
 572                    "Invalid slice segment address: %u.\n",
 573                    sh->slice_segment_addr);
 574             return AVERROR_INVALIDDATA;
 575         }
 576
 577         if (!sh->dependent_slice_segment_flag) {
 578             sh->slice_addr = sh->slice_segment_addr;
 579             s->slice_idx++;
 580         }
 581     } else {
 582         sh->slice_segment_addr = sh->slice_addr = 0;
 583         s->slice_idx           = 0;
 584         s->slice_initialized   = 0;
 585     }
 586
 587     if (!sh->dependent_slice_segment_flag) {
 588         s->slice_initialized = 0;
 589
 590         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 591             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 592
 593         sh->slice_type = get_ue_golomb_long(gb);
 594         if (!(sh->slice_type == HEVC_SLICE_I ||
 595               sh->slice_type == HEVC_SLICE_P ||
 596               sh->slice_type == HEVC_SLICE_B)) {
 597             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 598                    sh->slice_type);
 599             return AVERROR_INVALIDDATA;
 600         }
 601         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 602             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 603             return AVERROR_INVALIDDATA;
 604         }
 605
 606         // when flag is not present, picture is inferred to be output
 607         sh->pic_output_flag = 1;
 608         if (s->ps.pps->output_flag_present_flag)
 609             sh->pic_output_flag = get_bits1(gb);
 610
 611         if (s->ps.sps->separate_colour_plane_flag)
 612             sh->colour_plane_id = get_bits(gb, 2);
 613
 614         if (!IS_IDR(s)) {
 615             int poc, pos;
 616
 617             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 618             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
 619             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 620                 av_log(s->avctx, AV_LOG_WARNING,
 621                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 622                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 623                     return AVERROR_INVALIDDATA;
 624                 poc = s->poc;
 625             }
 626             s->poc = poc;
 627
 628             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 629             pos = get_bits_left(gb);
 630             if (!sh->short_term_ref_pic_set_sps_flag) {
 631                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 632                 if (ret < 0)
 633                     return ret;
 634
 635                 sh->short_term_rps = &sh->slice_rps;
 636             } else {
 637                 int numbits, rps_idx;
 638
 639                 if (!s->ps.sps->nb_st_rps) {
 640                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 641                     return AVERROR_INVALIDDATA;
 642                 }
 643
 644                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 645                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 646                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 647             }
 648             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 649
 650             pos = get_bits_left(gb);
 651             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 652             if (ret < 0) {
 653                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 654                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 655                     return AVERROR_INVALIDDATA;
 656             }
 657             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 658
 659             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 660                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 661             else
 662                 sh->slice_temporal_mvp_enabled_flag = 0;
 663         } else {
 664             s->sh.short_term_rps = NULL;
 665             s->poc               = 0;
 666         }
 667
 668         /* 8.3.1 */
 669         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 670             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 671             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 672             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 673             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 674             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 675             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 676             s->nal_unit_type != HEVC_NAL_RASL_R)
 677             s->pocTid0 = s->poc;
 678
 679         if (s->ps.sps->sao_enabled) {
 680             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 681             if (s->ps.sps->chroma_format_idc) {
 682                 sh->slice_sample_adaptive_offset_flag[1] =
 683                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 684             }
 685         } else {
 686             sh->slice_sample_adaptive_offset_flag[0] = 0;
 687             sh->slice_sample_adaptive_offset_flag[1] = 0;
 688             sh->slice_sample_adaptive_offset_flag[2] = 0;
 689         }
 690
 691         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 692         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 693             int nb_refs;
 694
 695             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 696             if (sh->slice_type == HEVC_SLICE_B)
 697                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 698
 699             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 700                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 701                 if (sh->slice_type == HEVC_SLICE_B)
 702                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 703             }
 704             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 705                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 706                        sh->nb_refs[L0], sh->nb_refs[L1]);
 707                 return AVERROR_INVALIDDATA;
 708             }
 709
 710             sh->rpl_modification_flag[0] = 0;
 711             sh->rpl_modification_flag[1] = 0;
 712             nb_refs = ff_hevc_frame_nb_refs(s);
 713             if (!nb_refs) {
 714                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 715                 return AVERROR_INVALIDDATA;
 716             }
 717
 718             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 719                 sh->rpl_modification_flag[0] = get_bits1(gb);
 720                 if (sh->rpl_modification_flag[0]) {
 721                     for (i = 0; i < sh->nb_refs[L0]; i++)
 722                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 723                 }
 724
 725                 if (sh->slice_type == HEVC_SLICE_B) {
 726                     sh->rpl_modification_flag[1] = get_bits1(gb);
 727                     if (sh->rpl_modification_flag[1] == 1)
 728                         for (i = 0; i < sh->nb_refs[L1]; i++)
 729                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 730                 }
 731             }
 732
 733             if (sh->slice_type == HEVC_SLICE_B)
 734                 sh->mvd_l1_zero_flag = get_bits1(gb);
 735
 736             if (s->ps.pps->cabac_init_present_flag)
 737                 sh->cabac_init_flag = get_bits1(gb);
 738             else
 739                 sh->cabac_init_flag = 0;
 740
 741             sh->collocated_ref_idx = 0;
 742             if (sh->slice_temporal_mvp_enabled_flag) {
 743                 sh->collocated_list = L0;
 744                 if (sh->slice_type == HEVC_SLICE_B)
 745                     sh->collocated_list = !get_bits1(gb);
 746
 747                 if (sh->nb_refs[sh->collocated_list] > 1) {
 748                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 749                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 750                         av_log(s->avctx, AV_LOG_ERROR,
 751                                "Invalid collocated_ref_idx: %d.\n",
 752                                sh->collocated_ref_idx);
 753                         return AVERROR_INVALIDDATA;
 754                     }
 755                 }
 756             }
 757
 758             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 759                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 760                 int ret = pred_weight_table(s, gb);
 761                 if (ret < 0)
 762                     return ret;
 763             }
 764
 765             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 766             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 767                 av_log(s->avctx, AV_LOG_ERROR,
 768                        "Invalid number of merging MVP candidates: %d.\n",
 769                        sh->max_num_merge_cand);
 770                 return AVERROR_INVALIDDATA;
 771             }
 772         }
 773
 774         sh->slice_qp_delta = get_se_golomb(gb);
 775
 776         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 777             sh->slice_cb_qp_offset = get_se_golomb(gb);
 778             sh->slice_cr_qp_offset = get_se_golomb(gb);
 779         } else {
 780             sh->slice_cb_qp_offset = 0;
 781             sh->slice_cr_qp_offset = 0;
 782         }
 783
 784         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 785             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 786         else
 787             sh->cu_chroma_qp_offset_enabled_flag = 0;
 788
 789         if (s->ps.pps->deblocking_filter_control_present_flag) {
 790             int deblocking_filter_override_flag = 0;
 791
 792             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 793                 deblocking_filter_override_flag = get_bits1(gb);
 794
 795             if (deblocking_filter_override_flag) {
 796                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 797                 if (!sh->disable_deblocking_filter_flag) {
 798                     int beta_offset_div2 = get_se_golomb(gb);
 799                     int tc_offset_div2   = get_se_golomb(gb) ;
 800                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
 801                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
 802                         av_log(s->avctx, AV_LOG_ERROR,
 803                             "Invalid deblock filter offsets: %d, %d\n",
 804                             beta_offset_div2, tc_offset_div2);
 805                         return AVERROR_INVALIDDATA;
 806                     }
 807                     sh->beta_offset = beta_offset_div2 * 2;
 808                     sh->tc_offset   =   tc_offset_div2 * 2;
 809                 }
 810             } else {
 811                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 812                 sh->beta_offset                    = s->ps.pps->beta_offset;
 813                 sh->tc_offset                      = s->ps.pps->tc_offset;
 814             }
 815         } else {
 816             sh->disable_deblocking_filter_flag = 0;
 817             sh->beta_offset                    = 0;
 818             sh->tc_offset                      = 0;
 819         }
 820
 821         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 822             (sh->slice_sample_adaptive_offset_flag[0] ||
 823              sh->slice_sample_adaptive_offset_flag[1] ||
 824              !sh->disable_deblocking_filter_flag)) {
 825             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 826         } else {
 827             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 828         }
 829     } else if (!s->slice_initialized) {
 830         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 831         return AVERROR_INVALIDDATA;
 832     }
 833
 834     sh->num_entry_point_offsets = 0;
 835     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 836         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 837         // It would be possible to bound this tighter but this here is simpler
 838         if (num_entry_point_offsets > get_bits_left(gb)) {
 839             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 840             return AVERROR_INVALIDDATA;
 841         }
 842
 843         sh->num_entry_point_offsets = num_entry_point_offsets;
 844         if (sh->num_entry_point_offsets > 0) {
 845             int offset_len = get_ue_golomb_long(gb) + 1;
 846
 847             if (offset_len < 1 || offset_len > 32) {
 848                 sh->num_entry_point_offsets = 0;
 849                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 850                 return AVERROR_INVALIDDATA;
 851             }
 852
 853             av_freep(&sh->entry_point_offset);
 854             av_freep(&sh->offset);
 855             av_freep(&sh->size);
 856             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 857             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 858             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 859             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 860                 sh->num_entry_point_offsets = 0;
 861                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 862                 return AVERROR(ENOMEM);
 863             }
 864             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 865                 unsigned val = get_bits_long(gb, offset_len);
 866                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 867             }
 868             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 869                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 870                 s->threads_number = 1;
 871             } else
 872                 s->enable_parallel_tiles = 0;
 873         } else
 874             s->enable_parallel_tiles = 0;
 875     }
 876
 877     if (s->ps.pps->slice_header_extension_present_flag) {
 878         unsigned int length = get_ue_golomb_long(gb);
 879         if (length*8LL > get_bits_left(gb)) {
 880             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 881             return AVERROR_INVALIDDATA;
 882         }
 883         for (i = 0; i < length; i++)
 884             skip_bits(gb, 8);  // slice_header_extension_data_byte
 885     }
 886
 887     // Inferred parameters
 888     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 889     if (sh->slice_qp > 51 ||
 890         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 891         av_log(s->avctx, AV_LOG_ERROR,
 892                "The slice_qp %d is outside the valid range "
 893                "[%d, 51].\n",
 894                sh->slice_qp,
 895                -s->ps.sps->qp_bd_offset);
 896         return AVERROR_INVALIDDATA;
 897     }
 898
 899     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 900
 901     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 902         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 903         return AVERROR_INVALIDDATA;
 904     }
 905
 906     if (get_bits_left(gb) < 0) {
 907         av_log(s->avctx, AV_LOG_ERROR,
 908                "Overread slice header by %d bits\n", -get_bits_left(gb));
 909         return AVERROR_INVALIDDATA;
 910     }
 911
 912     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 913
 914     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 915         s->HEVClc->qp_y = s->sh.slice_qp;
 916
 917     s->slice_initialized = 1;
 918     s->HEVClc->tu.cu_qp_offset_cb = 0;
 919     s->HEVClc->tu.cu_qp_offset_cr = 0;
 920
 921     return 0;
 922 }
 923
 924 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 925
 926 #define SET_SAO(elem, value)                            \
 927 do {                                                    \
 928     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 929         sao->elem = value;                              \
 930     else if (sao_merge_left_flag)                       \
 931         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 932     else if (sao_merge_up_flag)                         \
 933         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 934     else                                                \
 935         sao->elem = 0;                                  \
 936 } while (0)
 937
 938 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 939 {
 940     HEVCLocalContext *lc    = s->HEVClc;
 941     int sao_merge_left_flag = 0;
 942     int sao_merge_up_flag   = 0;
 943     SAOParams *sao          = &CTB(s->sao, rx, ry);
 944     int c_idx, i;
 945
 946     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 947         s->sh.slice_sample_adaptive_offset_flag[1]) {
 948         if (rx > 0) {
 949             if (lc->ctb_left_flag)
 950                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 951         }
 952         if (ry > 0 && !sao_merge_left_flag) {
 953             if (lc->ctb_up_flag)
 954                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 955         }
 956     }
 957
 958     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 959         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 960                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 961
 962         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 963             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 964             continue;
 965         }
 966
 967         if (c_idx == 2) {
 968             sao->type_idx[2] = sao->type_idx[1];
 969             sao->eo_class[2] = sao->eo_class[1];
 970         } else {
 971             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 972         }
 973
 974         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 975             continue;
 976
 977         for (i = 0; i < 4; i++)
 978             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 979
 980         if (sao->type_idx[c_idx] == SAO_BAND) {
 981             for (i = 0; i < 4; i++) {
 982                 if (sao->offset_abs[c_idx][i]) {
 983                     SET_SAO(offset_sign[c_idx][i],
 984                             ff_hevc_sao_offset_sign_decode(s));
 985                 } else {
 986                     sao->offset_sign[c_idx][i] = 0;
 987                 }
 988             }
 989             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 990         } else if (c_idx != 2) {
 991             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 992         }
 993
 994         // Inferred parameters
 995         sao->offset_val[c_idx][0] = 0;
 996         for (i = 0; i < 4; i++) {
 997             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 998             if (sao->type_idx[c_idx] == SAO_EDGE) {
 999                 if (i > 1)
1000                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1001             } else if (sao->offset_sign[c_idx][i]) {
1002                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1003             }
1004             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1005         }
1006     }
1007 }
1008
1009 #undef SET_SAO
1010 #undef CTB
1011
1012 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1013     HEVCLocalContext *lc    = s->HEVClc;
1014     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1015
1016     if (log2_res_scale_abs_plus1 !=  0) {
1017         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1018         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1019                                (1 - 2 * res_scale_sign_flag);
1020     } else {
1021         lc->tu.res_scale_val = 0;
1022     }
1023
1024
1025     return 0;
1026 }
1027
1028 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1029                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1030                               int log2_cb_size, int log2_trafo_size,
1031                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1032 {
1033     HEVCLocalContext *lc = s->HEVClc;
1034     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1035     int i;
1036
1037     if (lc->cu.pred_mode == MODE_INTRA) {
1038         int trafo_size = 1 << log2_trafo_size;
1039         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1040
1041         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1042     }
1043
1044     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1045         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1046         int scan_idx   = SCAN_DIAG;
1047         int scan_idx_c = SCAN_DIAG;
1048         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1049                          (s->ps.sps->chroma_format_idc == 2 &&
1050                          (cbf_cb[1] || cbf_cr[1]));
1051
1052         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1053             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1054             if (lc->tu.cu_qp_delta != 0)
1055                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1056                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1057             lc->tu.is_cu_qp_delta_coded = 1;
1058
1059             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1060                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1061                 av_log(s->avctx, AV_LOG_ERROR,
1062                        "The cu_qp_delta %d is outside the valid range "
1063                        "[%d, %d].\n",
1064                        lc->tu.cu_qp_delta,
1065                        -(26 + s->ps.sps->qp_bd_offset / 2),
1066                         (25 + s->ps.sps->qp_bd_offset / 2));
1067                 return AVERROR_INVALIDDATA;
1068             }
1069
1070             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1071         }
1072
1073         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1074             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1075             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1076             if (cu_chroma_qp_offset_flag) {
1077                 int cu_chroma_qp_offset_idx  = 0;
1078                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1079                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1080                     av_log(s->avctx, AV_LOG_ERROR,
1081                         "cu_chroma_qp_offset_idx not yet tested.\n");
1082                 }
1083                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1084                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1085             } else {
1086                 lc->tu.cu_qp_offset_cb = 0;
1087                 lc->tu.cu_qp_offset_cr = 0;
1088             }
1089             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1090         }
1091
1092         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1093             if (lc->tu.intra_pred_mode >= 6 &&
1094                 lc->tu.intra_pred_mode <= 14) {
1095                 scan_idx = SCAN_VERT;
1096             } else if (lc->tu.intra_pred_mode >= 22 &&
1097                        lc->tu.intra_pred_mode <= 30) {
1098                 scan_idx = SCAN_HORIZ;
1099             }
1100
1101             if (lc->tu.intra_pred_mode_c >=  6 &&
1102                 lc->tu.intra_pred_mode_c <= 14) {
1103                 scan_idx_c = SCAN_VERT;
1104             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1105                        lc->tu.intra_pred_mode_c <= 30) {
1106                 scan_idx_c = SCAN_HORIZ;
1107             }
1108         }
1109
1110         lc->tu.cross_pf = 0;
1111
1112         if (cbf_luma)
1113             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1114         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1115             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1116             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1117             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1118                                 (lc->cu.pred_mode == MODE_INTER ||
1119                                  (lc->tu.chroma_mode_c ==  4)));
1120
1121             if (lc->tu.cross_pf) {
1122                 hls_cross_component_pred(s, 0);
1123             }
1124             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1125                 if (lc->cu.pred_mode == MODE_INTRA) {
1126                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1127                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1128                 }
1129                 if (cbf_cb[i])
1130                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1131                                                 log2_trafo_size_c, scan_idx_c, 1);
1132                 else
1133                     if (lc->tu.cross_pf) {
1134                         ptrdiff_t stride = s->frame->linesize[1];
1135                         int hshift = s->ps.sps->hshift[1];
1136                         int vshift = s->ps.sps->vshift[1];
1137                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1138                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1139                         int size = 1 << log2_trafo_size_c;
1140
1141                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1142                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1143                         for (i = 0; i < (size * size); i++) {
1144                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1145                         }
1146                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1147                     }
1148             }
1149
1150             if (lc->tu.cross_pf) {
1151                 hls_cross_component_pred(s, 1);
1152             }
1153             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1154                 if (lc->cu.pred_mode == MODE_INTRA) {
1155                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1156                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1157                 }
1158                 if (cbf_cr[i])
1159                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1160                                                 log2_trafo_size_c, scan_idx_c, 2);
1161                 else
1162                     if (lc->tu.cross_pf) {
1163                         ptrdiff_t stride = s->frame->linesize[2];
1164                         int hshift = s->ps.sps->hshift[2];
1165                         int vshift = s->ps.sps->vshift[2];
1166                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1167                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1168                         int size = 1 << log2_trafo_size_c;
1169
1170                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1171                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1172                         for (i = 0; i < (size * size); i++) {
1173                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1174                         }
1175                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1176                     }
1177             }
1178         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1179             int trafo_size_h = 1 << (log2_trafo_size + 1);
1180             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1181             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1182                 if (lc->cu.pred_mode == MODE_INTRA) {
1183                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1184                                                     trafo_size_h, trafo_size_v);
1185                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1186                 }
1187                 if (cbf_cb[i])
1188                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1189                                                 log2_trafo_size, scan_idx_c, 1);
1190             }
1191             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1192                 if (lc->cu.pred_mode == MODE_INTRA) {
1193                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1194                                                 trafo_size_h, trafo_size_v);
1195                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1196                 }
1197                 if (cbf_cr[i])
1198                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1199                                                 log2_trafo_size, scan_idx_c, 2);
1200             }
1201         }
1202     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1203         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1204             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1205             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1206             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1207             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1208             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1209             if (s->ps.sps->chroma_format_idc == 2) {
1210                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1211                                                 trafo_size_h, trafo_size_v);
1212                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1213                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1214             }
1215         } else if (blk_idx == 3) {
1216             int trafo_size_h = 1 << (log2_trafo_size + 1);
1217             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1218             ff_hevc_set_neighbour_available(s, xBase, yBase,
1219                                             trafo_size_h, trafo_size_v);
1220             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1221             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1222             if (s->ps.sps->chroma_format_idc == 2) {
1223                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1224                                                 trafo_size_h, trafo_size_v);
1225                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1226                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1227             }
1228         }
1229     }
1230
1231     return 0;
1232 }
1233
1234 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1235 {
1236     int cb_size          = 1 << log2_cb_size;
1237     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1238
1239     int min_pu_width     = s->ps.sps->min_pu_width;
1240     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1241     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1242     int i, j;
1243
1244     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1245         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1246             s->is_pcm[i + j * min_pu_width] = 2;
1247 }
1248
1249 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1250                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1251                               int log2_cb_size, int log2_trafo_size,
1252                               int trafo_depth, int blk_idx,
1253                               const int *base_cbf_cb, const int *base_cbf_cr)
1254 {
1255     HEVCLocalContext *lc = s->HEVClc;
1256     uint8_t split_transform_flag;
1257     int cbf_cb[2];
1258     int cbf_cr[2];
1259     int ret;
1260
1261     cbf_cb[0] = base_cbf_cb[0];
1262     cbf_cb[1] = base_cbf_cb[1];
1263     cbf_cr[0] = base_cbf_cr[0];
1264     cbf_cr[1] = base_cbf_cr[1];
1265
1266     if (lc->cu.intra_split_flag) {
1267         if (trafo_depth == 1) {
1268             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1269             if (s->ps.sps->chroma_format_idc == 3) {
1270                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1271                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1272             } else {
1273                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1274                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1275             }
1276         }
1277     } else {
1278         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1279         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1280         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1281     }
1282
1283     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1284         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1285         trafo_depth     < lc->cu.max_trafo_depth       &&
1286         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1287         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1288     } else {
1289         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1290                           lc->cu.pred_mode == MODE_INTER &&
1291                           lc->cu.part_mode != PART_2Nx2N &&
1292                           trafo_depth == 0;
1293
1294         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1295                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1296                                inter_split;
1297     }
1298
1299     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1300         if (trafo_depth == 0 || cbf_cb[0]) {
1301             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1302             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1303                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1304             }
1305         }
1306
1307         if (trafo_depth == 0 || cbf_cr[0]) {
1308             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1309             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1310                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1311             }
1312         }
1313     }
1314
1315     if (split_transform_flag) {
1316         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1317         const int x1 = x0 + trafo_size_split;
1318         const int y1 = y0 + trafo_size_split;
1319
1320 #define SUBDIVIDE(x, y, idx)                                                    \
1321 do {                                                                            \
1322     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1323                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1324                              cbf_cb, cbf_cr);                                   \
1325     if (ret < 0)                                                                \
1326         return ret;                                                             \
1327 } while (0)
1328
1329         SUBDIVIDE(x0, y0, 0);
1330         SUBDIVIDE(x1, y0, 1);
1331         SUBDIVIDE(x0, y1, 2);
1332         SUBDIVIDE(x1, y1, 3);
1333
1334 #undef SUBDIVIDE
1335     } else {
1336         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1337         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1338         int min_tu_width     = s->ps.sps->min_tb_width;
1339         int cbf_luma         = 1;
1340
1341         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1342             cbf_cb[0] || cbf_cr[0] ||
1343             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1344             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1345         }
1346
1347         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1348                                  log2_cb_size, log2_trafo_size,
1349                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1350         if (ret < 0)
1351             return ret;
1352         // TODO: store cbf_luma somewhere else
1353         if (cbf_luma) {
1354             int i, j;
1355             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1356                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1357                     int x_tu = (x0 + j) >> log2_min_tu_size;
1358                     int y_tu = (y0 + i) >> log2_min_tu_size;
1359                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1360                 }
1361         }
1362         if (!s->sh.disable_deblocking_filter_flag) {
1363             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1364             if (s->ps.pps->transquant_bypass_enable_flag &&
1365                 lc->cu.cu_transquant_bypass_flag)
1366                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1367         }
1368     }
1369     return 0;
1370 }
1371
1372 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1373 {
1374     HEVCLocalContext *lc = s->HEVClc;
1375     GetBitContext gb;
1376     int cb_size   = 1 << log2_cb_size;
1377     ptrdiff_t stride0 = s->frame->linesize[0];
1378     ptrdiff_t stride1 = s->frame->linesize[1];
1379     ptrdiff_t stride2 = s->frame->linesize[2];
1380     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1381     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1382     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1383
1384     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1385                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1386                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1387                           s->ps.sps->pcm.bit_depth_chroma;
1388     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1389     int ret;
1390
1391     if (!s->sh.disable_deblocking_filter_flag)
1392         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1393
1394     ret = init_get_bits(&gb, pcm, length);
1395     if (ret < 0)
1396         return ret;
1397
1398     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1399     if (s->ps.sps->chroma_format_idc) {
1400         s->hevcdsp.put_pcm(dst1, stride1,
1401                            cb_size >> s->ps.sps->hshift[1],
1402                            cb_size >> s->ps.sps->vshift[1],
1403                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1404         s->hevcdsp.put_pcm(dst2, stride2,
1405                            cb_size >> s->ps.sps->hshift[2],
1406                            cb_size >> s->ps.sps->vshift[2],
1407                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1408     }
1409
1410     return 0;
1411 }
1412
1413 /**
1414  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1415  *
1416  * @param s HEVC decoding context
1417  * @param dst target buffer for block data at block position
1418  * @param dststride stride of the dst buffer
1419  * @param ref reference picture buffer at origin (0, 0)
1420  * @param mv motion vector (relative to block position) to get pixel data from
1421  * @param x_off horizontal position of block from origin (0, 0)
1422  * @param y_off vertical position of block from origin (0, 0)
1423  * @param block_w width of block
1424  * @param block_h height of block
1425  * @param luma_weight weighting factor applied to the luma prediction
1426  * @param luma_offset additive offset applied to the luma prediction value
1427  */
1428
1429 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1430                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1431                         int block_w, int block_h, int luma_weight, int luma_offset)
1432 {
1433     HEVCLocalContext *lc = s->HEVClc;
1434     uint8_t *src         = ref->data[0];
1435     ptrdiff_t srcstride  = ref->linesize[0];
1436     int pic_width        = s->ps.sps->width;
1437     int pic_height       = s->ps.sps->height;
1438     int mx               = mv->x & 3;
1439     int my               = mv->y & 3;
1440     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1441                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1442     int idx              = ff_hevc_pel_weight[block_w];
1443
1444     x_off += mv->x >> 2;
1445     y_off += mv->y >> 2;
1446     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1447
1448     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1449         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1450         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1451         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1452         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1453         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1454
1455         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1456                                  edge_emu_stride, srcstride,
1457                                  block_w + QPEL_EXTRA,
1458                                  block_h + QPEL_EXTRA,
1459                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1460                                  pic_width, pic_height);
1461         src = lc->edge_emu_buffer + buf_offset;
1462         srcstride = edge_emu_stride;
1463     }
1464
1465     if (!weight_flag)
1466         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1467                                                       block_h, mx, my, block_w);
1468     else
1469         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1470                                                         block_h, s->sh.luma_log2_weight_denom,
1471                                                         luma_weight, luma_offset, mx, my, block_w);
1472 }
1473
1474 /**
1475  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1476  *
1477  * @param s HEVC decoding context
1478  * @param dst target buffer for block data at block position
1479  * @param dststride stride of the dst buffer
1480  * @param ref0 reference picture0 buffer at origin (0, 0)
1481  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1482  * @param x_off horizontal position of block from origin (0, 0)
1483  * @param y_off vertical position of block from origin (0, 0)
1484  * @param block_w width of block
1485  * @param block_h height of block
1486  * @param ref1 reference picture1 buffer at origin (0, 0)
1487  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1488  * @param current_mv current motion vector structure
1489  */
1490  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1491                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1492                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1493 {
1494     HEVCLocalContext *lc = s->HEVClc;
1495     ptrdiff_t src0stride  = ref0->linesize[0];
1496     ptrdiff_t src1stride  = ref1->linesize[0];
1497     int pic_width        = s->ps.sps->width;
1498     int pic_height       = s->ps.sps->height;
1499     int mx0              = mv0->x & 3;
1500     int my0              = mv0->y & 3;
1501     int mx1              = mv1->x & 3;
1502     int my1              = mv1->y & 3;
1503     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1504                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1505     int x_off0           = x_off + (mv0->x >> 2);
1506     int y_off0           = y_off + (mv0->y >> 2);
1507     int x_off1           = x_off + (mv1->x >> 2);
1508     int y_off1           = y_off + (mv1->y >> 2);
1509     int idx              = ff_hevc_pel_weight[block_w];
1510
1511     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1512     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1513
1514     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1515         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1516         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1517         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1518         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1519         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1520
1521         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1522                                  edge_emu_stride, src0stride,
1523                                  block_w + QPEL_EXTRA,
1524                                  block_h + QPEL_EXTRA,
1525                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1526                                  pic_width, pic_height);
1527         src0 = lc->edge_emu_buffer + buf_offset;
1528         src0stride = edge_emu_stride;
1529     }
1530
1531     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1532         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1533         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1534         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1535         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1536         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1537
1538         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1539                                  edge_emu_stride, src1stride,
1540                                  block_w + QPEL_EXTRA,
1541                                  block_h + QPEL_EXTRA,
1542                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1543                                  pic_width, pic_height);
1544         src1 = lc->edge_emu_buffer2 + buf_offset;
1545         src1stride = edge_emu_stride;
1546     }
1547
1548     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1549                                                 block_h, mx0, my0, block_w);
1550     if (!weight_flag)
1551         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1552                                                        block_h, mx1, my1, block_w);
1553     else
1554         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1555                                                          block_h, s->sh.luma_log2_weight_denom,
1556                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1557                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1558                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1559                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1560                                                          mx1, my1, block_w);
1561
1562 }
1563
1564 /**
1565  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1566  *
1567  * @param s HEVC decoding context
1568  * @param dst1 target buffer for block data at block position (U plane)
1569  * @param dst2 target buffer for block data at block position (V plane)
1570  * @param dststride stride of the dst1 and dst2 buffers
1571  * @param ref reference picture buffer at origin (0, 0)
1572  * @param mv motion vector (relative to block position) to get pixel data from
1573  * @param x_off horizontal position of block from origin (0, 0)
1574  * @param y_off vertical position of block from origin (0, 0)
1575  * @param block_w width of block
1576  * @param block_h height of block
1577  * @param chroma_weight weighting factor applied to the chroma prediction
1578  * @param chroma_offset additive offset applied to the chroma prediction value
1579  */
1580
1581 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1582                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1583                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1584 {
1585     HEVCLocalContext *lc = s->HEVClc;
1586     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1587     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1588     const Mv *mv         = &current_mv->mv[reflist];
1589     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1590                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1591     int idx              = ff_hevc_pel_weight[block_w];
1592     int hshift           = s->ps.sps->hshift[1];
1593     int vshift           = s->ps.sps->vshift[1];
1594     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1595     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1596     intptr_t _mx         = mx << (1 - hshift);
1597     intptr_t _my         = my << (1 - vshift);
1598
1599     x_off += mv->x >> (2 + hshift);
1600     y_off += mv->y >> (2 + vshift);
1601     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1602
1603     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1604         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1605         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1606         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1607         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1608         int buf_offset0 = EPEL_EXTRA_BEFORE *
1609                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1610         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1611                                  edge_emu_stride, srcstride,
1612                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1613                                  x_off - EPEL_EXTRA_BEFORE,
1614                                  y_off - EPEL_EXTRA_BEFORE,
1615                                  pic_width, pic_height);
1616
1617         src0 = lc->edge_emu_buffer + buf_offset0;
1618         srcstride = edge_emu_stride;
1619     }
1620     if (!weight_flag)
1621         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1622                                                   block_h, _mx, _my, block_w);
1623     else
1624         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1625                                                         block_h, s->sh.chroma_log2_weight_denom,
1626                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1627 }
1628
1629 /**
1630  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1631  *
1632  * @param s HEVC decoding context
1633  * @param dst target buffer for block data at block position
1634  * @param dststride stride of the dst buffer
1635  * @param ref0 reference picture0 buffer at origin (0, 0)
1636  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1637  * @param x_off horizontal position of block from origin (0, 0)
1638  * @param y_off vertical position of block from origin (0, 0)
1639  * @param block_w width of block
1640  * @param block_h height of block
1641  * @param ref1 reference picture1 buffer at origin (0, 0)
1642  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1643  * @param current_mv current motion vector structure
1644  * @param cidx chroma component(cb, cr)
1645  */
1646 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1647                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1648 {
1649     HEVCLocalContext *lc = s->HEVClc;
1650     uint8_t *src1        = ref0->data[cidx+1];
1651     uint8_t *src2        = ref1->data[cidx+1];
1652     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1653     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1654     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1655                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1656     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1657     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1658     Mv *mv0              = &current_mv->mv[0];
1659     Mv *mv1              = &current_mv->mv[1];
1660     int hshift = s->ps.sps->hshift[1];
1661     int vshift = s->ps.sps->vshift[1];
1662
1663     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1664     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1665     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1666     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1667     intptr_t _mx0 = mx0 << (1 - hshift);
1668     intptr_t _my0 = my0 << (1 - vshift);
1669     intptr_t _mx1 = mx1 << (1 - hshift);
1670     intptr_t _my1 = my1 << (1 - vshift);
1671
1672     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1673     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1674     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1675     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1676     int idx = ff_hevc_pel_weight[block_w];
1677     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1678     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1679
1680     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1681         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1682         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1683         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1684         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1685         int buf_offset1 = EPEL_EXTRA_BEFORE *
1686                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1687
1688         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1689                                  edge_emu_stride, src1stride,
1690                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1691                                  x_off0 - EPEL_EXTRA_BEFORE,
1692                                  y_off0 - EPEL_EXTRA_BEFORE,
1693                                  pic_width, pic_height);
1694
1695         src1 = lc->edge_emu_buffer + buf_offset1;
1696         src1stride = edge_emu_stride;
1697     }
1698
1699     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1700         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1701         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1702         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1703         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1704         int buf_offset1 = EPEL_EXTRA_BEFORE *
1705                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1706
1707         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1708                                  edge_emu_stride, src2stride,
1709                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1710                                  x_off1 - EPEL_EXTRA_BEFORE,
1711                                  y_off1 - EPEL_EXTRA_BEFORE,
1712                                  pic_width, pic_height);
1713
1714         src2 = lc->edge_emu_buffer2 + buf_offset1;
1715         src2stride = edge_emu_stride;
1716     }
1717
1718     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1719                                                 block_h, _mx0, _my0, block_w);
1720     if (!weight_flag)
1721         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1722                                                        src2, src2stride, lc->tmp,
1723                                                        block_h, _mx1, _my1, block_w);
1724     else
1725         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1726                                                          src2, src2stride, lc->tmp,
1727                                                          block_h,
1728                                                          s->sh.chroma_log2_weight_denom,
1729                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1730                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1731                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1732                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1733                                                          _mx1, _my1, block_w);
1734 }
1735
1736 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1737                                 const Mv *mv, int y0, int height)
1738 {
1739     if (s->threads_type == FF_THREAD_FRAME ) {
1740         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1741
1742         ff_thread_await_progress(&ref->tf, y, 0);
1743     }
1744 }
1745
1746 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1747                                   int nPbH, int log2_cb_size, int part_idx,
1748                                   int merge_idx, MvField *mv)
1749 {
1750     HEVCLocalContext *lc = s->HEVClc;
1751     enum InterPredIdc inter_pred_idc = PRED_L0;
1752     int mvp_flag;
1753
1754     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1755     mv->pred_flag = 0;
1756     if (s->sh.slice_type == HEVC_SLICE_B)
1757         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1758
1759     if (inter_pred_idc != PRED_L1) {
1760         if (s->sh.nb_refs[L0])
1761             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1762
1763         mv->pred_flag = PF_L0;
1764         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1765         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1766         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1767                                  part_idx, merge_idx, mv, mvp_flag, 0);
1768         mv->mv[0].x += lc->pu.mvd.x;
1769         mv->mv[0].y += lc->pu.mvd.y;
1770     }
1771
1772     if (inter_pred_idc != PRED_L0) {
1773         if (s->sh.nb_refs[L1])
1774             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1775
1776         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1777             AV_ZERO32(&lc->pu.mvd);
1778         } else {
1779             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1780         }
1781
1782         mv->pred_flag += PF_L1;
1783         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1784         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1785                                  part_idx, merge_idx, mv, mvp_flag, 1);
1786         mv->mv[1].x += lc->pu.mvd.x;
1787         mv->mv[1].y += lc->pu.mvd.y;
1788     }
1789 }
1790
1791 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1792                                 int nPbW, int nPbH,
1793                                 int log2_cb_size, int partIdx, int idx)
1794 {
1795 #define POS(c_idx, x, y)                                                              \
1796     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1797                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1798     HEVCLocalContext *lc = s->HEVClc;
1799     int merge_idx = 0;
1800     struct MvField current_mv = {{{ 0 }}};
1801
1802     int min_pu_width = s->ps.sps->min_pu_width;
1803
1804     MvField *tab_mvf = s->ref->tab_mvf;
1805     RefPicList  *refPicList = s->ref->refPicList;
1806     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1807     uint8_t *dst0 = POS(0, x0, y0);
1808     uint8_t *dst1 = POS(1, x0, y0);
1809     uint8_t *dst2 = POS(2, x0, y0);
1810     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1811     int min_cb_width     = s->ps.sps->min_cb_width;
1812     int x_cb             = x0 >> log2_min_cb_size;
1813     int y_cb             = y0 >> log2_min_cb_size;
1814     int x_pu, y_pu;
1815     int i, j;
1816
1817     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1818
1819     if (!skip_flag)
1820         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1821
1822     if (skip_flag || lc->pu.merge_flag) {
1823         if (s->sh.max_num_merge_cand > 1)
1824             merge_idx = ff_hevc_merge_idx_decode(s);
1825         else
1826             merge_idx = 0;
1827
1828         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1829                                    partIdx, merge_idx, &current_mv);
1830     } else {
1831         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1832                               partIdx, merge_idx, &current_mv);
1833     }
1834
1835     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1836     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1837
1838     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1839         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1840             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1841
1842     if (current_mv.pred_flag & PF_L0) {
1843         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1844         if (!ref0)
1845             return;
1846         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1847     }
1848     if (current_mv.pred_flag & PF_L1) {
1849         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1850         if (!ref1)
1851             return;
1852         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1853     }
1854
1855     if (current_mv.pred_flag == PF_L0) {
1856         int x0_c = x0 >> s->ps.sps->hshift[1];
1857         int y0_c = y0 >> s->ps.sps->vshift[1];
1858         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1859         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1860
1861         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1862                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1863                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1864                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1865
1866         if (s->ps.sps->chroma_format_idc) {
1867             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1868                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1869                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1870             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1871                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1872                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1873         }
1874     } else if (current_mv.pred_flag == PF_L1) {
1875         int x0_c = x0 >> s->ps.sps->hshift[1];
1876         int y0_c = y0 >> s->ps.sps->vshift[1];
1877         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1878         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1879
1880         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1881                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1882                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1883                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1884
1885         if (s->ps.sps->chroma_format_idc) {
1886             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1887                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1888                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1889
1890             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1891                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1892                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1893         }
1894     } else if (current_mv.pred_flag == PF_BI) {
1895         int x0_c = x0 >> s->ps.sps->hshift[1];
1896         int y0_c = y0 >> s->ps.sps->vshift[1];
1897         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1898         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1899
1900         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1901                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1902                    ref1->frame, &current_mv.mv[1], &current_mv);
1903
1904         if (s->ps.sps->chroma_format_idc) {
1905             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1906                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1907
1908             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1909                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1910         }
1911     }
1912 }
1913
1914 /**
1915  * 8.4.1
1916  */
1917 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1918                                 int prev_intra_luma_pred_flag)
1919 {
1920     HEVCLocalContext *lc = s->HEVClc;
1921     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1922     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1923     int min_pu_width     = s->ps.sps->min_pu_width;
1924     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1925     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1926     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1927
1928     int cand_up   = (lc->ctb_up_flag || y0b) ?
1929                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1930     int cand_left = (lc->ctb_left_flag || x0b) ?
1931                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1932
1933     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1934
1935     MvField *tab_mvf = s->ref->tab_mvf;
1936     int intra_pred_mode;
1937     int candidate[3];
1938     int i, j;
1939
1940     // intra_pred_mode prediction does not cross vertical CTB boundaries
1941     if ((y0 - 1) < y_ctb)
1942         cand_up = INTRA_DC;
1943
1944     if (cand_left == cand_up) {
1945         if (cand_left < 2) {
1946             candidate[0] = INTRA_PLANAR;
1947             candidate[1] = INTRA_DC;
1948             candidate[2] = INTRA_ANGULAR_26;
1949         } else {
1950             candidate[0] = cand_left;
1951             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1952             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1953         }
1954     } else {
1955         candidate[0] = cand_left;
1956         candidate[1] = cand_up;
1957         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1958             candidate[2] = INTRA_PLANAR;
1959         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1960             candidate[2] = INTRA_DC;
1961         } else {
1962             candidate[2] = INTRA_ANGULAR_26;
1963         }
1964     }
1965
1966     if (prev_intra_luma_pred_flag) {
1967         intra_pred_mode = candidate[lc->pu.mpm_idx];
1968     } else {
1969         if (candidate[0] > candidate[1])
1970             FFSWAP(uint8_t, candidate[0], candidate[1]);
1971         if (candidate[0] > candidate[2])
1972             FFSWAP(uint8_t, candidate[0], candidate[2]);
1973         if (candidate[1] > candidate[2])
1974             FFSWAP(uint8_t, candidate[1], candidate[2]);
1975
1976         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1977         for (i = 0; i < 3; i++)
1978             if (intra_pred_mode >= candidate[i])
1979                 intra_pred_mode++;
1980     }
1981
1982     /* write the intra prediction units into the mv array */
1983     if (!size_in_pus)
1984         size_in_pus = 1;
1985     for (i = 0; i < size_in_pus; i++) {
1986         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1987                intra_pred_mode, size_in_pus);
1988
1989         for (j = 0; j < size_in_pus; j++) {
1990             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1991         }
1992     }
1993
1994     return intra_pred_mode;
1995 }
1996
1997 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1998                                           int log2_cb_size, int ct_depth)
1999 {
2000     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2001     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2002     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2003     int y;
2004
2005     for (y = 0; y < length; y++)
2006         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2007                ct_depth, length);
2008 }
2009
2010 static const uint8_t tab_mode_idx[] = {
2011      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2012     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2013
2014 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2015                                   int log2_cb_size)
2016 {
2017     HEVCLocalContext *lc = s->HEVClc;
2018     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2019     uint8_t prev_intra_luma_pred_flag[4];
2020     int split   = lc->cu.part_mode == PART_NxN;
2021     int pb_size = (1 << log2_cb_size) >> split;
2022     int side    = split + 1;
2023     int chroma_mode;
2024     int i, j;
2025
2026     for (i = 0; i < side; i++)
2027         for (j = 0; j < side; j++)
2028             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2029
2030     for (i = 0; i < side; i++) {
2031         for (j = 0; j < side; j++) {
2032             if (prev_intra_luma_pred_flag[2 * i + j])
2033                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2034             else
2035                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2036
2037             lc->pu.intra_pred_mode[2 * i + j] =
2038                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2039                                      prev_intra_luma_pred_flag[2 * i + j]);
2040         }
2041     }
2042
2043     if (s->ps.sps->chroma_format_idc == 3) {
2044         for (i = 0; i < side; i++) {
2045             for (j = 0; j < side; j++) {
2046                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2047                 if (chroma_mode != 4) {
2048                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2049                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2050                     else
2051                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2052                 } else {
2053                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2054                 }
2055             }
2056         }
2057     } else if (s->ps.sps->chroma_format_idc == 2) {
2058         int mode_idx;
2059         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2060         if (chroma_mode != 4) {
2061             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2062                 mode_idx = 34;
2063             else
2064                 mode_idx = intra_chroma_table[chroma_mode];
2065         } else {
2066             mode_idx = lc->pu.intra_pred_mode[0];
2067         }
2068         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2069     } else if (s->ps.sps->chroma_format_idc != 0) {
2070         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2071         if (chroma_mode != 4) {
2072             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2073                 lc->pu.intra_pred_mode_c[0] = 34;
2074             else
2075                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2076         } else {
2077             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2078         }
2079     }
2080 }
2081
2082 static void intra_prediction_unit_default_value(HEVCContext *s,
2083                                                 int x0, int y0,
2084                                                 int log2_cb_size)
2085 {
2086     HEVCLocalContext *lc = s->HEVClc;
2087     int pb_size          = 1 << log2_cb_size;
2088     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2089     int min_pu_width     = s->ps.sps->min_pu_width;
2090     MvField *tab_mvf     = s->ref->tab_mvf;
2091     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2092     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2093     int j, k;
2094
2095     if (size_in_pus == 0)
2096         size_in_pus = 1;
2097     for (j = 0; j < size_in_pus; j++)
2098         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2099     if (lc->cu.pred_mode == MODE_INTRA)
2100         for (j = 0; j < size_in_pus; j++)
2101             for (k = 0; k < size_in_pus; k++)
2102                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2103 }
2104
2105 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2106 {
2107     int cb_size          = 1 << log2_cb_size;
2108     HEVCLocalContext *lc = s->HEVClc;
2109     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2110     int length           = cb_size >> log2_min_cb_size;
2111     int min_cb_width     = s->ps.sps->min_cb_width;
2112     int x_cb             = x0 >> log2_min_cb_size;
2113     int y_cb             = y0 >> log2_min_cb_size;
2114     int idx              = log2_cb_size - 2;
2115     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2116     int x, y, ret;
2117
2118     lc->cu.x                = x0;
2119     lc->cu.y                = y0;
2120     lc->cu.pred_mode        = MODE_INTRA;
2121     lc->cu.part_mode        = PART_2Nx2N;
2122     lc->cu.intra_split_flag = 0;
2123
2124     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2125     for (x = 0; x < 4; x++)
2126         lc->pu.intra_pred_mode[x] = 1;
2127     if (s->ps.pps->transquant_bypass_enable_flag) {
2128         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2129         if (lc->cu.cu_transquant_bypass_flag)
2130             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2131     } else
2132         lc->cu.cu_transquant_bypass_flag = 0;
2133
2134     if (s->sh.slice_type != HEVC_SLICE_I) {
2135         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2136
2137         x = y_cb * min_cb_width + x_cb;
2138         for (y = 0; y < length; y++) {
2139             memset(&s->skip_flag[x], skip_flag, length);
2140             x += min_cb_width;
2141         }
2142         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2143     } else {
2144         x = y_cb * min_cb_width + x_cb;
2145         for (y = 0; y < length; y++) {
2146             memset(&s->skip_flag[x], 0, length);
2147             x += min_cb_width;
2148         }
2149     }
2150
2151     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2152         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2153         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2154
2155         if (!s->sh.disable_deblocking_filter_flag)
2156             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2157     } else {
2158         int pcm_flag = 0;
2159
2160         if (s->sh.slice_type != HEVC_SLICE_I)
2161             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2162         if (lc->cu.pred_mode != MODE_INTRA ||
2163             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2164             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2165             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2166                                       lc->cu.pred_mode == MODE_INTRA;
2167         }
2168
2169         if (lc->cu.pred_mode == MODE_INTRA) {
2170             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2171                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2172                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2173                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2174             }
2175             if (pcm_flag) {
2176                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2177                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2178                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2179                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2180
2181                 if (ret < 0)
2182                     return ret;
2183             } else {
2184                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2185             }
2186         } else {
2187             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2188             switch (lc->cu.part_mode) {
2189             case PART_2Nx2N:
2190                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2191                 break;
2192             case PART_2NxN:
2193                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2194                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2195                 break;
2196             case PART_Nx2N:
2197                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2198                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2199                 break;
2200             case PART_2NxnU:
2201                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2202                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2203                 break;
2204             case PART_2NxnD:
2205                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2206                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2207                 break;
2208             case PART_nLx2N:
2209                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2210                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2211                 break;
2212             case PART_nRx2N:
2213                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2214                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2215                 break;
2216             case PART_NxN:
2217                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2218                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2219                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2220                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2221                 break;
2222             }
2223         }
2224
2225         if (!pcm_flag) {
2226             int rqt_root_cbf = 1;
2227
2228             if (lc->cu.pred_mode != MODE_INTRA &&
2229                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2230                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2231             }
2232             if (rqt_root_cbf) {
2233                 const static int cbf[2] = { 0 };
2234                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2235                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2236                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2237                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2238                                          log2_cb_size,
2239                                          log2_cb_size, 0, 0, cbf, cbf);
2240                 if (ret < 0)
2241                     return ret;
2242             } else {
2243                 if (!s->sh.disable_deblocking_filter_flag)
2244                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2245             }
2246         }
2247     }
2248
2249     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2250         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2251
2252     x = y_cb * min_cb_width + x_cb;
2253     for (y = 0; y < length; y++) {
2254         memset(&s->qp_y_tab[x], lc->qp_y, length);
2255         x += min_cb_width;
2256     }
2257
2258     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2259        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2260         lc->qPy_pred = lc->qp_y;
2261     }
2262
2263     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2264
2265     return 0;
2266 }
2267
2268 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2269                                int log2_cb_size, int cb_depth)
2270 {
2271     HEVCLocalContext *lc = s->HEVClc;
2272     const int cb_size    = 1 << log2_cb_size;
2273     int ret;
2274     int split_cu;
2275
2276     lc->ct_depth = cb_depth;
2277     if (x0 + cb_size <= s->ps.sps->width  &&
2278         y0 + cb_size <= s->ps.sps->height &&
2279         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2280         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2281     } else {
2282         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2283     }
2284     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2285         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2286         lc->tu.is_cu_qp_delta_coded = 0;
2287         lc->tu.cu_qp_delta          = 0;
2288     }
2289
2290     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2291         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2292         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2293     }
2294
2295     if (split_cu) {
2296         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2297         const int cb_size_split = cb_size >> 1;
2298         const int x1 = x0 + cb_size_split;
2299         const int y1 = y0 + cb_size_split;
2300
2301         int more_data = 0;
2302
2303         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2304         if (more_data < 0)
2305             return more_data;
2306
2307         if (more_data && x1 < s->ps.sps->width) {
2308             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2309             if (more_data < 0)
2310                 return more_data;
2311         }
2312         if (more_data && y1 < s->ps.sps->height) {
2313             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2314             if (more_data < 0)
2315                 return more_data;
2316         }
2317         if (more_data && x1 < s->ps.sps->width &&
2318             y1 < s->ps.sps->height) {
2319             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2320             if (more_data < 0)
2321                 return more_data;
2322         }
2323
2324         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2325             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2326             lc->qPy_pred = lc->qp_y;
2327
2328         if (more_data)
2329             return ((x1 + cb_size_split) < s->ps.sps->width ||
2330                     (y1 + cb_size_split) < s->ps.sps->height);
2331         else
2332             return 0;
2333     } else {
2334         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2335         if (ret < 0)
2336             return ret;
2337         if ((!((x0 + cb_size) %
2338                (1 << (s->ps.sps->log2_ctb_size))) ||
2339              (x0 + cb_size >= s->ps.sps->width)) &&
2340             (!((y0 + cb_size) %
2341                (1 << (s->ps.sps->log2_ctb_size))) ||
2342              (y0 + cb_size >= s->ps.sps->height))) {
2343             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2344             return !end_of_slice_flag;
2345         } else {
2346             return 1;
2347         }
2348     }
2349
2350     return 0;
2351 }
2352
2353 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2354                                  int ctb_addr_ts)
2355 {
2356     HEVCLocalContext *lc  = s->HEVClc;
2357     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2358     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2359     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2360
2361     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2362
2363     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2364         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2365             lc->first_qp_group = 1;
2366         lc->end_of_tiles_x = s->ps.sps->width;
2367     } else if (s->ps.pps->tiles_enabled_flag) {
2368         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2369             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2370             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2371             lc->first_qp_group   = 1;
2372         }
2373     } else {
2374         lc->end_of_tiles_x = s->ps.sps->width;
2375     }
2376
2377     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2378
2379     lc->boundary_flags = 0;
2380     if (s->ps.pps->tiles_enabled_flag) {
2381         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2382             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2383         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2384             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2385         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2386             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2387         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2388             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2389     } else {
2390         if (ctb_addr_in_slice <= 0)
2391             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2392         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2393             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2394     }
2395
2396     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2397     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2398     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2399     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2400 }
2401
2402 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2403 {
2404     HEVCContext *s  = avctxt->priv_data;
2405     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2406     int more_data   = 1;
2407     int x_ctb       = 0;
2408     int y_ctb       = 0;
2409     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2410     int ret;
2411
2412     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2413         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2414         return AVERROR_INVALIDDATA;
2415     }
2416
2417     if (s->sh.dependent_slice_segment_flag) {
2418         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2419         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2420             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2421             return AVERROR_INVALIDDATA;
2422         }
2423     }
2424
2425     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2426         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2427
2428         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2429         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2430         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2431
2432         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2433         if (ret < 0) {
2434             s->tab_slice_address[ctb_addr_rs] = -1;
2435             return ret;
2436         }
2437
2438         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2439
2440         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2441         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2442         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2443
2444         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2445         if (more_data < 0) {
2446             s->tab_slice_address[ctb_addr_rs] = -1;
2447             return more_data;
2448         }
2449
2450
2451         ctb_addr_ts++;
2452         ff_hevc_save_states(s, ctb_addr_ts);
2453         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2454     }
2455
2456     if (x_ctb + ctb_size >= s->ps.sps->width &&
2457         y_ctb + ctb_size >= s->ps.sps->height)
2458         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2459
2460     return ctb_addr_ts;
2461 }
2462
2463 static int hls_slice_data(HEVCContext *s)
2464 {
2465     int arg[2];
2466     int ret[2];
2467
2468     arg[0] = 0;
2469     arg[1] = 1;
2470
2471     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2472     return ret[0];
2473 }
2474 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2475 {
2476     HEVCContext *s1  = avctxt->priv_data, *s;
2477     HEVCLocalContext *lc;
2478     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2479     int more_data   = 1;
2480     int *ctb_row_p    = input_ctb_row;
2481     int ctb_row = ctb_row_p[job];
2482     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2483     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2484     int thread = ctb_row % s1->threads_number;
2485     int ret;
2486
2487     s = s1->sList[self_id];
2488     lc = s->HEVClc;
2489
2490     if(ctb_row) {
2491         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2492         if (ret < 0)
2493             goto error;
2494         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2495     }
2496
2497     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2498         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2499         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2500
2501         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2502
2503         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2504
2505         if (atomic_load(&s1->wpp_err)) {
2506             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2507             return 0;
2508         }
2509
2510         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2511         if (ret < 0)
2512             goto error;
2513         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2514         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2515
2516         if (more_data < 0) {
2517             ret = more_data;
2518             goto error;
2519         }
2520
2521         ctb_addr_ts++;
2522
2523         ff_hevc_save_states(s, ctb_addr_ts);
2524         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2525         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2526
2527         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2528             atomic_store(&s1->wpp_err, 1);
2529             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2530             return 0;
2531         }
2532
2533         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2534             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2535             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2536             return ctb_addr_ts;
2537         }
2538         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2539         x_ctb+=ctb_size;
2540
2541         if(x_ctb >= s->ps.sps->width) {
2542             break;
2543         }
2544     }
2545     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2546
2547     return 0;
2548 error:
2549     s->tab_slice_address[ctb_addr_rs] = -1;
2550     atomic_store(&s1->wpp_err, 1);
2551     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2552     return ret;
2553 }
2554
2555 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2556 {
2557     const uint8_t *data = nal->data;
2558     int length          = nal->size;
2559     HEVCLocalContext *lc = s->HEVClc;
2560     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2561     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2562     int64_t offset;
2563     int64_t startheader, cmpt = 0;
2564     int i, j, res = 0;
2565
2566     if (!ret || !arg) {
2567         av_free(ret);
2568         av_free(arg);
2569         return AVERROR(ENOMEM);
2570     }
2571
2572     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2573         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2574             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2575             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2576         );
2577         res = AVERROR_INVALIDDATA;
2578         goto error;
2579     }
2580
2581     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2582
2583     if (!s->sList[1]) {
2584         for (i = 1; i < s->threads_number; i++) {
2585             s->sList[i] = av_malloc(sizeof(HEVCContext));
2586             memcpy(s->sList[i], s, sizeof(HEVCContext));
2587             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2588             s->sList[i]->HEVClc = s->HEVClcList[i];
2589         }
2590     }
2591
2592     offset = (lc->gb.index >> 3);
2593
2594     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2595         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2596             startheader--;
2597             cmpt++;
2598         }
2599     }
2600
2601     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2602         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2603         for (j = 0, cmpt = 0, startheader = offset
2604              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2605             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2606                 startheader--;
2607                 cmpt++;
2608             }
2609         }
2610         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2611         s->sh.offset[i - 1] = offset;
2612
2613     }
2614     if (s->sh.num_entry_point_offsets != 0) {
2615         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2616         if (length < offset) {
2617             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2618             res = AVERROR_INVALIDDATA;
2619             goto error;
2620         }
2621         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2622         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2623
2624     }
2625     s->data = data;
2626
2627     for (i = 1; i < s->threads_number; i++) {
2628         s->sList[i]->HEVClc->first_qp_group = 1;
2629         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2630         memcpy(s->sList[i], s, sizeof(HEVCContext));
2631         s->sList[i]->HEVClc = s->HEVClcList[i];
2632     }
2633
2634     atomic_store(&s->wpp_err, 0);
2635     ff_reset_entries(s->avctx);
2636
2637     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2638         arg[i] = i;
2639         ret[i] = 0;
2640     }
2641
2642     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2643         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2644
2645     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2646         res += ret[i];
2647 error:
2648     av_free(ret);
2649     av_free(arg);
2650     return res;
2651 }
2652
2653 static int set_side_data(HEVCContext *s)
2654 {
2655     AVFrame *out = s->ref->frame;
2656
2657     if (s->sei.frame_packing.present &&
2658         s->sei.frame_packing.arrangement_type >= 3 &&
2659         s->sei.frame_packing.arrangement_type <= 5 &&
2660         s->sei.frame_packing.content_interpretation_type > 0 &&
2661         s->sei.frame_packing.content_interpretation_type < 3) {
2662         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2663         if (!stereo)
2664             return AVERROR(ENOMEM);
2665
2666         switch (s->sei.frame_packing.arrangement_type) {
2667         case 3:
2668             if (s->sei.frame_packing.quincunx_subsampling)
2669                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2670             else
2671                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2672             break;
2673         case 4:
2674             stereo->type = AV_STEREO3D_TOPBOTTOM;
2675             break;
2676         case 5:
2677             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2678             break;
2679         }
2680
2681         if (s->sei.frame_packing.content_interpretation_type == 2)
2682             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2683
2684         if (s->sei.frame_packing.arrangement_type == 5) {
2685             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2686                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2687             else
2688                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2689         }
2690     }
2691
2692     if (s->sei.display_orientation.present &&
2693         (s->sei.display_orientation.anticlockwise_rotation ||
2694          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2695         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2696         AVFrameSideData *rotation = av_frame_new_side_data(out,
2697                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2698                                                            sizeof(int32_t) * 9);
2699         if (!rotation)
2700             return AVERROR(ENOMEM);
2701
2702         av_display_rotation_set((int32_t *)rotation->data, angle);
2703         av_display_matrix_flip((int32_t *)rotation->data,
2704                                s->sei.display_orientation.hflip,
2705                                s->sei.display_orientation.vflip);
2706     }
2707
2708     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2709     // so the side data persists for the entire coded video sequence.
2710     if (s->sei.mastering_display.present > 0 &&
2711         IS_IRAP(s) && s->no_rasl_output_flag) {
2712         s->sei.mastering_display.present--;
2713     }
2714     if (s->sei.mastering_display.present) {
2715         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2716         const int mapping[3] = {2, 0, 1};
2717         const int chroma_den = 50000;
2718         const int luma_den = 10000;
2719         int i;
2720         AVMasteringDisplayMetadata *metadata =
2721             av_mastering_display_metadata_create_side_data(out);
2722         if (!metadata)
2723             return AVERROR(ENOMEM);
2724
2725         for (i = 0; i < 3; i++) {
2726             const int j = mapping[i];
2727             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2728             metadata->display_primaries[i][0].den = chroma_den;
2729             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2730             metadata->display_primaries[i][1].den = chroma_den;
2731         }
2732         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2733         metadata->white_point[0].den = chroma_den;
2734         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2735         metadata->white_point[1].den = chroma_den;
2736
2737         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2738         metadata->max_luminance.den = luma_den;
2739         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2740         metadata->min_luminance.den = luma_den;
2741         metadata->has_luminance = 1;
2742         metadata->has_primaries = 1;
2743
2744         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2745         av_log(s->avctx, AV_LOG_DEBUG,
2746                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2747                av_q2d(metadata->display_primaries[0][0]),
2748                av_q2d(metadata->display_primaries[0][1]),
2749                av_q2d(metadata->display_primaries[1][0]),
2750                av_q2d(metadata->display_primaries[1][1]),
2751                av_q2d(metadata->display_primaries[2][0]),
2752                av_q2d(metadata->display_primaries[2][1]),
2753                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2754         av_log(s->avctx, AV_LOG_DEBUG,
2755                "min_luminance=%f, max_luminance=%f\n",
2756                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2757     }
2758     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2759     // so the side data persists for the entire coded video sequence.
2760     if (s->sei.content_light.present > 0 &&
2761         IS_IRAP(s) && s->no_rasl_output_flag) {
2762         s->sei.content_light.present--;
2763     }
2764     if (s->sei.content_light.present) {
2765         AVContentLightMetadata *metadata =
2766             av_content_light_metadata_create_side_data(out);
2767         if (!metadata)
2768             return AVERROR(ENOMEM);
2769         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2770         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2771
2772         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2773         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2774                metadata->MaxCLL, metadata->MaxFALL);
2775     }
2776
2777     if (s->sei.a53_caption.a53_caption) {
2778         AVFrameSideData* sd = av_frame_new_side_data(out,
2779                                                      AV_FRAME_DATA_A53_CC,
2780                                                      s->sei.a53_caption.a53_caption_size);
2781         if (sd)
2782             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2783         av_freep(&s->sei.a53_caption.a53_caption);
2784         s->sei.a53_caption.a53_caption_size = 0;
2785         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2786     }
2787
2788     return 0;
2789 }
2790
2791 static int hevc_frame_start(HEVCContext *s)
2792 {
2793     HEVCLocalContext *lc = s->HEVClc;
2794     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2795                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2796     int ret;
2797
2798     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2799     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2800     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2801     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2802     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2803
2804     s->is_decoded        = 0;
2805     s->first_nal_type    = s->nal_unit_type;
2806
2807     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2808
2809     if (s->ps.pps->tiles_enabled_flag)
2810         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2811
2812     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2813     if (ret < 0)
2814         goto fail;
2815
2816     ret = ff_hevc_frame_rps(s);
2817     if (ret < 0) {
2818         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2819         goto fail;
2820     }
2821
2822     s->ref->frame->key_frame = IS_IRAP(s);
2823
2824     ret = set_side_data(s);
2825     if (ret < 0)
2826         goto fail;
2827
2828     s->frame->pict_type = 3 - s->sh.slice_type;
2829
2830     if (!IS_IRAP(s))
2831         ff_hevc_bump_frame(s);
2832
2833     av_frame_unref(s->output_frame);
2834     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2835     if (ret < 0)
2836         goto fail;
2837
2838     if (!s->avctx->hwaccel)
2839         ff_thread_finish_setup(s->avctx);
2840
2841     return 0;
2842
2843 fail:
2844     if (s->ref)
2845         ff_hevc_unref_frame(s, s->ref, ~0);
2846     s->ref = NULL;
2847     return ret;
2848 }
2849
2850 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2851 {
2852     HEVCLocalContext *lc = s->HEVClc;
2853     GetBitContext *gb    = &lc->gb;
2854     int ctb_addr_ts, ret;
2855
2856     *gb              = nal->gb;
2857     s->nal_unit_type = nal->type;
2858     s->temporal_id   = nal->temporal_id;
2859
2860     switch (s->nal_unit_type) {
2861     case HEVC_NAL_VPS:
2862         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2863             ret = s->avctx->hwaccel->decode_params(s->avctx,
2864                                                    nal->type,
2865                                                    nal->raw_data,
2866                                                    nal->raw_size);
2867             if (ret < 0)
2868                 goto fail;
2869         }
2870         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2871         if (ret < 0)
2872             goto fail;
2873         break;
2874     case HEVC_NAL_SPS:
2875         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2876             ret = s->avctx->hwaccel->decode_params(s->avctx,
2877                                                    nal->type,
2878                                                    nal->raw_data,
2879                                                    nal->raw_size);
2880             if (ret < 0)
2881                 goto fail;
2882         }
2883         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2884                                      s->apply_defdispwin);
2885         if (ret < 0)
2886             goto fail;
2887         break;
2888     case HEVC_NAL_PPS:
2889         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2890             ret = s->avctx->hwaccel->decode_params(s->avctx,
2891                                                    nal->type,
2892                                                    nal->raw_data,
2893                                                    nal->raw_size);
2894             if (ret < 0)
2895                 goto fail;
2896         }
2897         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2898         if (ret < 0)
2899             goto fail;
2900         break;
2901     case HEVC_NAL_SEI_PREFIX:
2902     case HEVC_NAL_SEI_SUFFIX:
2903         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2904             ret = s->avctx->hwaccel->decode_params(s->avctx,
2905                                                    nal->type,
2906                                                    nal->raw_data,
2907                                                    nal->raw_size);
2908             if (ret < 0)
2909                 goto fail;
2910         }
2911         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2912         if (ret < 0)
2913             goto fail;
2914         break;
2915     case HEVC_NAL_TRAIL_R:
2916     case HEVC_NAL_TRAIL_N:
2917     case HEVC_NAL_TSA_N:
2918     case HEVC_NAL_TSA_R:
2919     case HEVC_NAL_STSA_N:
2920     case HEVC_NAL_STSA_R:
2921     case HEVC_NAL_BLA_W_LP:
2922     case HEVC_NAL_BLA_W_RADL:
2923     case HEVC_NAL_BLA_N_LP:
2924     case HEVC_NAL_IDR_W_RADL:
2925     case HEVC_NAL_IDR_N_LP:
2926     case HEVC_NAL_CRA_NUT:
2927     case HEVC_NAL_RADL_N:
2928     case HEVC_NAL_RADL_R:
2929     case HEVC_NAL_RASL_N:
2930     case HEVC_NAL_RASL_R:
2931         ret = hls_slice_header(s);
2932         if (ret < 0)
2933             return ret;
2934         if (ret == 1) {
2935             ret = AVERROR_INVALIDDATA;
2936             goto fail;
2937         }
2938
2939
2940         if (
2941             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2942             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2943             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2944             break;
2945         }
2946
2947         if (s->sh.first_slice_in_pic_flag) {
2948             if (s->max_ra == INT_MAX) {
2949                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2950                     s->max_ra = s->poc;
2951                 } else {
2952                     if (IS_IDR(s))
2953                         s->max_ra = INT_MIN;
2954                 }
2955             }
2956
2957             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2958                 s->poc <= s->max_ra) {
2959                 s->is_decoded = 0;
2960                 break;
2961             } else {
2962                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2963                     s->max_ra = INT_MIN;
2964             }
2965
2966             s->overlap ++;
2967             ret = hevc_frame_start(s);
2968             if (ret < 0)
2969                 return ret;
2970         } else if (!s->ref) {
2971             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2972             goto fail;
2973         }
2974
2975         if (s->nal_unit_type != s->first_nal_type) {
2976             av_log(s->avctx, AV_LOG_ERROR,
2977                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2978                    s->first_nal_type, s->nal_unit_type);
2979             return AVERROR_INVALIDDATA;
2980         }
2981
2982         if (!s->sh.dependent_slice_segment_flag &&
2983             s->sh.slice_type != HEVC_SLICE_I) {
2984             ret = ff_hevc_slice_rpl(s);
2985             if (ret < 0) {
2986                 av_log(s->avctx, AV_LOG_WARNING,
2987                        "Error constructing the reference lists for the current slice.\n");
2988                 goto fail;
2989             }
2990         }
2991
2992         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2993             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2994             if (ret < 0)
2995                 goto fail;
2996         }
2997
2998         if (s->avctx->hwaccel) {
2999             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3000             if (ret < 0)
3001                 goto fail;
3002         } else {
3003             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3004                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3005             else
3006                 ctb_addr_ts = hls_slice_data(s);
3007             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3008                 s->is_decoded = 1;
3009             }
3010
3011             if (ctb_addr_ts < 0) {
3012                 ret = ctb_addr_ts;
3013                 goto fail;
3014             }
3015         }
3016         break;
3017     case HEVC_NAL_EOS_NUT:
3018     case HEVC_NAL_EOB_NUT:
3019         s->seq_decode = (s->seq_decode + 1) & 0xff;
3020         s->max_ra     = INT_MAX;
3021         break;
3022     case HEVC_NAL_AUD:
3023     case HEVC_NAL_FD_NUT:
3024         break;
3025     default:
3026         av_log(s->avctx, AV_LOG_INFO,
3027                "Skipping NAL unit %d\n", s->nal_unit_type);
3028     }
3029
3030     return 0;
3031 fail:
3032     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3033         return ret;
3034     return 0;
3035 }
3036
3037 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3038 {
3039     int i, ret = 0;
3040     int eos_at_start = 1;
3041
3042     s->ref = NULL;
3043     s->last_eos = s->eos;
3044     s->eos = 0;
3045     s->overlap = 0;
3046
3047     /* split the input packet into NAL units, so we know the upper bound on the
3048      * number of slices in the frame */
3049     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3050                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3051     if (ret < 0) {
3052         av_log(s->avctx, AV_LOG_ERROR,
3053                "Error splitting the input into NAL units.\n");
3054         return ret;
3055     }
3056
3057     for (i = 0; i < s->pkt.nb_nals; i++) {
3058         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3059             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3060             if (eos_at_start) {
3061                 s->last_eos = 1;
3062             } else {
3063                 s->eos = 1;
3064             }
3065         } else {
3066             eos_at_start = 0;
3067         }
3068     }
3069
3070     /* decode the NAL units */
3071     for (i = 0; i < s->pkt.nb_nals; i++) {
3072         H2645NAL *nal = &s->pkt.nals[i];
3073
3074         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3075             (s->avctx->skip_frame >= AVDISCARD_NONREF
3076             && ff_hevc_nal_is_nonref(nal->type)))
3077             continue;
3078
3079         ret = decode_nal_unit(s, nal);
3080         if (ret >= 0 && s->overlap > 2)
3081             ret = AVERROR_INVALIDDATA;
3082         if (ret < 0) {
3083             av_log(s->avctx, AV_LOG_WARNING,
3084                    "Error parsing NAL unit #%d.\n", i);
3085             goto fail;
3086         }
3087     }
3088
3089 fail:
3090     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3091         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3092
3093     return ret;
3094 }
3095
3096 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3097 {
3098     int i;
3099     for (i = 0; i < 16; i++)
3100         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3101 }
3102
3103 static int verify_md5(HEVCContext *s, AVFrame *frame)
3104 {
3105     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3106     int pixel_shift;
3107     int i, j;
3108
3109     if (!desc)
3110         return AVERROR(EINVAL);
3111
3112     pixel_shift = desc->comp[0].depth > 8;
3113
3114     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3115            s->poc);
3116
3117     /* the checksums are LE, so we have to byteswap for >8bpp formats
3118      * on BE arches */
3119 #if HAVE_BIGENDIAN
3120     if (pixel_shift && !s->checksum_buf) {
3121         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3122                        FFMAX3(frame->linesize[0], frame->linesize[1],
3123                               frame->linesize[2]));
3124         if (!s->checksum_buf)
3125             return AVERROR(ENOMEM);
3126     }
3127 #endif
3128
3129     for (i = 0; frame->data[i]; i++) {
3130         int width  = s->avctx->coded_width;
3131         int height = s->avctx->coded_height;
3132         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3133         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3134         uint8_t md5[16];
3135
3136         av_md5_init(s->md5_ctx);
3137         for (j = 0; j < h; j++) {
3138             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3139 #if HAVE_BIGENDIAN
3140             if (pixel_shift) {
3141                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3142                                     (const uint16_t *) src, w);
3143                 src = s->checksum_buf;
3144             }
3145 #endif
3146             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3147         }
3148         av_md5_final(s->md5_ctx, md5);
3149
3150         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3151             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3152             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3153             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3154         } else {
3155             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3156             print_md5(s->avctx, AV_LOG_ERROR, md5);
3157             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3158             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3159             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3160             return AVERROR_INVALIDDATA;
3161         }
3162     }
3163
3164     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3165
3166     return 0;
3167 }
3168
3169 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3170 {
3171     int ret, i;
3172
3173     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3174                                    &s->nal_length_size, s->avctx->err_recognition,
3175                                    s->apply_defdispwin, s->avctx);
3176     if (ret < 0)
3177         return ret;
3178
3179     /* export stream parameters from the first SPS */
3180     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3181         if (first && s->ps.sps_list[i]) {
3182             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3183             export_stream_params(s, sps);
3184             break;
3185         }
3186     }
3187
3188     return 0;
3189 }
3190
3191 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3192                              AVPacket *avpkt)
3193 {
3194     int ret;
3195     int new_extradata_size;
3196     uint8_t *new_extradata;
3197     HEVCContext *s = avctx->priv_data;
3198
3199     if (!avpkt->size) {
3200         ret = ff_hevc_output_frame(s, data, 1);
3201         if (ret < 0)
3202             return ret;
3203
3204         *got_output = ret;
3205         return 0;
3206     }
3207
3208     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3209                                             &new_extradata_size);
3210     if (new_extradata && new_extradata_size > 0) {
3211         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3212         if (ret < 0)
3213             return ret;
3214     }
3215
3216     s->ref = NULL;
3217     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3218     if (ret < 0)
3219         return ret;
3220
3221     if (avctx->hwaccel) {
3222         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3223             av_log(avctx, AV_LOG_ERROR,
3224                    "hardware accelerator failed to decode picture\n");
3225             ff_hevc_unref_frame(s, s->ref, ~0);
3226             return ret;
3227         }
3228     } else {
3229         /* verify the SEI checksum */
3230         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3231             s->sei.picture_hash.is_md5) {
3232             ret = verify_md5(s, s->ref->frame);
3233             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3234                 ff_hevc_unref_frame(s, s->ref, ~0);
3235                 return ret;
3236             }
3237         }
3238     }
3239     s->sei.picture_hash.is_md5 = 0;
3240
3241     if (s->is_decoded) {
3242         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3243         s->is_decoded = 0;
3244     }
3245
3246     if (s->output_frame->buf[0]) {
3247         av_frame_move_ref(data, s->output_frame);
3248         *got_output = 1;
3249     }
3250
3251     return avpkt->size;
3252 }
3253
3254 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3255 {
3256     int ret;
3257
3258     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3259     if (ret < 0)
3260         return ret;
3261
3262     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3263     if (!dst->tab_mvf_buf)
3264         goto fail;
3265     dst->tab_mvf = src->tab_mvf;
3266
3267     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3268     if (!dst->rpl_tab_buf)
3269         goto fail;
3270     dst->rpl_tab = src->rpl_tab;
3271
3272     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3273     if (!dst->rpl_buf)
3274         goto fail;
3275
3276     dst->poc        = src->poc;
3277     dst->ctb_count  = src->ctb_count;
3278     dst->flags      = src->flags;
3279     dst->sequence   = src->sequence;
3280
3281     if (src->hwaccel_picture_private) {
3282         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3283         if (!dst->hwaccel_priv_buf)
3284             goto fail;
3285         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3286     }
3287
3288     return 0;
3289 fail:
3290     ff_hevc_unref_frame(s, dst, ~0);
3291     return AVERROR(ENOMEM);
3292 }
3293
3294 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3295 {
3296     HEVCContext       *s = avctx->priv_data;
3297     int i;
3298
3299     pic_arrays_free(s);
3300
3301     av_freep(&s->md5_ctx);
3302
3303     av_freep(&s->cabac_state);
3304
3305     for (i = 0; i < 3; i++) {
3306         av_freep(&s->sao_pixel_buffer_h[i]);
3307         av_freep(&s->sao_pixel_buffer_v[i]);
3308     }
3309     av_frame_free(&s->output_frame);
3310
3311     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3312         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3313         av_frame_free(&s->DPB[i].frame);
3314     }
3315
3316     ff_hevc_ps_uninit(&s->ps);
3317
3318     av_freep(&s->sh.entry_point_offset);
3319     av_freep(&s->sh.offset);
3320     av_freep(&s->sh.size);
3321
3322     for (i = 1; i < s->threads_number; i++) {
3323         HEVCLocalContext *lc = s->HEVClcList[i];
3324         if (lc) {
3325             av_freep(&s->HEVClcList[i]);
3326             av_freep(&s->sList[i]);
3327         }
3328     }
3329     if (s->HEVClc == s->HEVClcList[0])
3330         s->HEVClc = NULL;
3331     av_freep(&s->HEVClcList[0]);
3332
3333     ff_h2645_packet_uninit(&s->pkt);
3334
3335     return 0;
3336 }
3337
3338 static av_cold int hevc_init_context(AVCodecContext *avctx)
3339 {
3340     HEVCContext *s = avctx->priv_data;
3341     int i;
3342
3343     s->avctx = avctx;
3344
3345     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3346     if (!s->HEVClc)
3347         goto fail;
3348     s->HEVClcList[0] = s->HEVClc;
3349     s->sList[0] = s;
3350
3351     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3352     if (!s->cabac_state)
3353         goto fail;
3354
3355     s->output_frame = av_frame_alloc();
3356     if (!s->output_frame)
3357         goto fail;
3358
3359     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3360         s->DPB[i].frame = av_frame_alloc();
3361         if (!s->DPB[i].frame)
3362             goto fail;
3363         s->DPB[i].tf.f = s->DPB[i].frame;
3364     }
3365
3366     s->max_ra = INT_MAX;
3367
3368     s->md5_ctx = av_md5_alloc();
3369     if (!s->md5_ctx)
3370         goto fail;
3371
3372     ff_bswapdsp_init(&s->bdsp);
3373
3374     s->context_initialized = 1;
3375     s->eos = 0;
3376
3377     ff_hevc_reset_sei(&s->sei);
3378
3379     return 0;
3380
3381 fail:
3382     hevc_decode_free(avctx);
3383     return AVERROR(ENOMEM);
3384 }
3385
3386 #if HAVE_THREADS
3387 static int hevc_update_thread_context(AVCodecContext *dst,
3388                                       const AVCodecContext *src)
3389 {
3390     HEVCContext *s  = dst->priv_data;
3391     HEVCContext *s0 = src->priv_data;
3392     int i, ret;
3393
3394     if (!s->context_initialized) {
3395         ret = hevc_init_context(dst);
3396         if (ret < 0)
3397             return ret;
3398     }
3399
3400     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3401         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3402         if (s0->DPB[i].frame->buf[0]) {
3403             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3404             if (ret < 0)
3405                 return ret;
3406         }
3407     }
3408
3409     if (s->ps.sps != s0->ps.sps)
3410         s->ps.sps = NULL;
3411     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3412         av_buffer_unref(&s->ps.vps_list[i]);
3413         if (s0->ps.vps_list[i]) {
3414             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3415             if (!s->ps.vps_list[i])
3416                 return AVERROR(ENOMEM);
3417         }
3418     }
3419
3420     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3421         av_buffer_unref(&s->ps.sps_list[i]);
3422         if (s0->ps.sps_list[i]) {
3423             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3424             if (!s->ps.sps_list[i])
3425                 return AVERROR(ENOMEM);
3426         }
3427     }
3428
3429     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3430         av_buffer_unref(&s->ps.pps_list[i]);
3431         if (s0->ps.pps_list[i]) {
3432             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3433             if (!s->ps.pps_list[i])
3434                 return AVERROR(ENOMEM);
3435         }
3436     }
3437
3438     if (s->ps.sps != s0->ps.sps)
3439         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3440             return ret;
3441
3442     s->seq_decode = s0->seq_decode;
3443     s->seq_output = s0->seq_output;
3444     s->pocTid0    = s0->pocTid0;
3445     s->max_ra     = s0->max_ra;
3446     s->eos        = s0->eos;
3447     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3448
3449     s->is_nalff        = s0->is_nalff;
3450     s->nal_length_size = s0->nal_length_size;
3451
3452     s->threads_number      = s0->threads_number;
3453     s->threads_type        = s0->threads_type;
3454
3455     if (s0->eos) {
3456         s->seq_decode = (s->seq_decode + 1) & 0xff;
3457         s->max_ra = INT_MAX;
3458     }
3459
3460     s->sei.frame_packing        = s0->sei.frame_packing;
3461     s->sei.display_orientation  = s0->sei.display_orientation;
3462     s->sei.mastering_display    = s0->sei.mastering_display;
3463     s->sei.content_light        = s0->sei.content_light;
3464     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3465
3466     return 0;
3467 }
3468 #endif
3469
3470 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3471 {
3472     HEVCContext *s = avctx->priv_data;
3473     int ret;
3474
3475     avctx->internal->allocate_progress = 1;
3476
3477     ret = hevc_init_context(avctx);
3478     if (ret < 0)
3479         return ret;
3480
3481     s->enable_parallel_tiles = 0;
3482     s->sei.picture_timing.picture_struct = 0;
3483     s->eos = 1;
3484
3485     atomic_init(&s->wpp_err, 0);
3486
3487     if(avctx->active_thread_type & FF_THREAD_SLICE)
3488         s->threads_number = avctx->thread_count;
3489     else
3490         s->threads_number = 1;
3491
3492     if (avctx->extradata_size > 0 && avctx->extradata) {
3493         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3494         if (ret < 0) {
3495             hevc_decode_free(avctx);
3496             return ret;
3497         }
3498     }
3499
3500     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3501             s->threads_type = FF_THREAD_FRAME;
3502         else
3503             s->threads_type = FF_THREAD_SLICE;
3504
3505     return 0;
3506 }
3507
3508 #if HAVE_THREADS
3509 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3510 {
3511     HEVCContext *s = avctx->priv_data;
3512     int ret;
3513
3514     memset(s, 0, sizeof(*s));
3515
3516     ret = hevc_init_context(avctx);
3517     if (ret < 0)
3518         return ret;
3519
3520     return 0;
3521 }
3522 #endif
3523
3524 static void hevc_decode_flush(AVCodecContext *avctx)
3525 {
3526     HEVCContext *s = avctx->priv_data;
3527     ff_hevc_flush_dpb(s);
3528     s->max_ra = INT_MAX;
3529     s->eos = 1;
3530 }
3531
3532 #define OFFSET(x) offsetof(HEVCContext, x)
3533 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3534
3535 static const AVOption options[] = {
3536     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3537         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3538     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3539         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3540     { NULL },
3541 };
3542
3543 static const AVClass hevc_decoder_class = {
3544     .class_name = "HEVC decoder",
3545     .item_name  = av_default_item_name,
3546     .option     = options,
3547     .version    = LIBAVUTIL_VERSION_INT,
3548 };
3549
3550 AVCodec ff_hevc_decoder = {
3551     .name                  = "hevc",
3552     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3553     .type                  = AVMEDIA_TYPE_VIDEO,
3554     .id                    = AV_CODEC_ID_HEVC,
3555     .priv_data_size        = sizeof(HEVCContext),
3556     .priv_class            = &hevc_decoder_class,
3557     .init                  = hevc_decode_init,
3558     .close                 = hevc_decode_free,
3559     .decode                = hevc_decode_frame,
3560     .flush                 = hevc_decode_flush,
3561     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3562     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3563     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3564                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3565     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3566     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3567     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3568 #if CONFIG_HEVC_DXVA2_HWACCEL
3569                                HWACCEL_DXVA2(hevc),
3570 #endif
3571 #if CONFIG_HEVC_D3D11VA_HWACCEL
3572                                HWACCEL_D3D11VA(hevc),
3573 #endif
3574 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3575                                HWACCEL_D3D11VA2(hevc),
3576 #endif
3577 #if CONFIG_HEVC_NVDEC_HWACCEL
3578                                HWACCEL_NVDEC(hevc),
3579 #endif
3580 #if CONFIG_HEVC_VAAPI_HWACCEL
3581                                HWACCEL_VAAPI(hevc),
3582 #endif
3583 #if CONFIG_HEVC_VDPAU_HWACCEL
3584                                HWACCEL_VDPAU(hevc),
3585 #endif
3586 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3587                                HWACCEL_VIDEOTOOLBOX(hevc),
3588 #endif
3589                                NULL
3590                            },
3591 };