git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35 #include "libavutil/timecode.h"
  36
  37 #include "bswapdsp.h"
  38 #include "bytestream.h"
  39 #include "cabac_functions.h"
  40 #include "golomb.h"
  41 #include "hevc.h"
  42 #include "hevc_data.h"
  43 #include "hevc_parse.h"
  44 #include "hevcdec.h"
  45 #include "hwconfig.h"
  46 #include "profiles.h"
  47
  48 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  49
  50 /**
  51  * NOTE: Each function hls_foo correspond to the function foo in the
  52  * specification (HLS stands for High Level Syntax).
  53  */
  54
  55 /**
  56  * Section 5.7
  57  */
  58
  59 /* free everything allocated  by pic_arrays_init() */
  60 static void pic_arrays_free(HEVCContext *s)
  61 {
  62     av_freep(&s->sao);
  63     av_freep(&s->deblock);
  64
  65     av_freep(&s->skip_flag);
  66     av_freep(&s->tab_ct_depth);
  67
  68     av_freep(&s->tab_ipm);
  69     av_freep(&s->cbf_luma);
  70     av_freep(&s->is_pcm);
  71
  72     av_freep(&s->qp_y_tab);
  73     av_freep(&s->tab_slice_address);
  74     av_freep(&s->filter_slice_edges);
  75
  76     av_freep(&s->horizontal_bs);
  77     av_freep(&s->vertical_bs);
  78
  79     av_freep(&s->sh.entry_point_offset);
  80     av_freep(&s->sh.size);
  81     av_freep(&s->sh.offset);
  82
  83     av_buffer_pool_uninit(&s->tab_mvf_pool);
  84     av_buffer_pool_uninit(&s->rpl_tab_pool);
  85 }
  86
  87 /* allocate arrays that depend on frame dimensions */
  88 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  89 {
  90     int log2_min_cb_size = sps->log2_min_cb_size;
  91     int width            = sps->width;
  92     int height           = sps->height;
  93     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  94                            ((height >> log2_min_cb_size) + 1);
  95     int ctb_count        = sps->ctb_width * sps->ctb_height;
  96     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  97
  98     s->bs_width  = (width  >> 2) + 1;
  99     s->bs_height = (height >> 2) + 1;
 100
 101     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 102     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 103     if (!s->sao || !s->deblock)
 104         goto fail;
 105
 106     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 107     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 108     if (!s->skip_flag || !s->tab_ct_depth)
 109         goto fail;
 110
 111     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 112     s->tab_ipm  = av_mallocz(min_pu_size);
 113     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 114     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 115         goto fail;
 116
 117     s->filter_slice_edges = av_mallocz(ctb_count);
 118     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 119                                       sizeof(*s->tab_slice_address));
 120     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 121                                       sizeof(*s->qp_y_tab));
 122     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 123         goto fail;
 124
 125     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 126     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 127     if (!s->horizontal_bs || !s->vertical_bs)
 128         goto fail;
 129
 130     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 131                                           av_buffer_allocz);
 132     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 133                                           av_buffer_allocz);
 134     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 135         goto fail;
 136
 137     return 0;
 138
 139 fail:
 140     pic_arrays_free(s);
 141     return AVERROR(ENOMEM);
 142 }
 143
 144 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 145 {
 146     int i = 0;
 147     int j = 0;
 148     uint8_t luma_weight_l0_flag[16];
 149     uint8_t chroma_weight_l0_flag[16];
 150     uint8_t luma_weight_l1_flag[16];
 151     uint8_t chroma_weight_l1_flag[16];
 152     int luma_log2_weight_denom;
 153
 154     luma_log2_weight_denom = get_ue_golomb_long(gb);
 155     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
 156         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 157         return AVERROR_INVALIDDATA;
 158     }
 159     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 160     if (s->ps.sps->chroma_format_idc != 0) {
 161         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
 162         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
 163             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
 164             return AVERROR_INVALIDDATA;
 165         }
 166         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
 167     }
 168
 169     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 170         luma_weight_l0_flag[i] = get_bits1(gb);
 171         if (!luma_weight_l0_flag[i]) {
 172             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 173             s->sh.luma_offset_l0[i] = 0;
 174         }
 175     }
 176     if (s->ps.sps->chroma_format_idc != 0) {
 177         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 178             chroma_weight_l0_flag[i] = get_bits1(gb);
 179     } else {
 180         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 181             chroma_weight_l0_flag[i] = 0;
 182     }
 183     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 184         if (luma_weight_l0_flag[i]) {
 185             int delta_luma_weight_l0 = get_se_golomb(gb);
 186             if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
 187                 return AVERROR_INVALIDDATA;
 188             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 189             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 190         }
 191         if (chroma_weight_l0_flag[i]) {
 192             for (j = 0; j < 2; j++) {
 193                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 194                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 195
 196                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
 197                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
 198                     return AVERROR_INVALIDDATA;
 199                 }
 200
 201                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 202                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 203                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 204             }
 205         } else {
 206             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 207             s->sh.chroma_offset_l0[i][0] = 0;
 208             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 209             s->sh.chroma_offset_l0[i][1] = 0;
 210         }
 211     }
 212     if (s->sh.slice_type == HEVC_SLICE_B) {
 213         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 214             luma_weight_l1_flag[i] = get_bits1(gb);
 215             if (!luma_weight_l1_flag[i]) {
 216                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 217                 s->sh.luma_offset_l1[i] = 0;
 218             }
 219         }
 220         if (s->ps.sps->chroma_format_idc != 0) {
 221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 222                 chroma_weight_l1_flag[i] = get_bits1(gb);
 223         } else {
 224             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 225                 chroma_weight_l1_flag[i] = 0;
 226         }
 227         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 228             if (luma_weight_l1_flag[i]) {
 229                 int delta_luma_weight_l1 = get_se_golomb(gb);
 230                 if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
 231                     return AVERROR_INVALIDDATA;
 232                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 233                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 234             }
 235             if (chroma_weight_l1_flag[i]) {
 236                 for (j = 0; j < 2; j++) {
 237                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 238                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 239
 240                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
 241                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
 242                         return AVERROR_INVALIDDATA;
 243                     }
 244
 245                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 246                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 247                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 248                 }
 249             } else {
 250                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 251                 s->sh.chroma_offset_l1[i][0] = 0;
 252                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 253                 s->sh.chroma_offset_l1[i][1] = 0;
 254             }
 255         }
 256     }
 257     return 0;
 258 }
 259
 260 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 261 {
 262     const HEVCSPS *sps = s->ps.sps;
 263     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 264     int prev_delta_msb = 0;
 265     unsigned int nb_sps = 0, nb_sh;
 266     int i;
 267
 268     rps->nb_refs = 0;
 269     if (!sps->long_term_ref_pics_present_flag)
 270         return 0;
 271
 272     if (sps->num_long_term_ref_pics_sps > 0)
 273         nb_sps = get_ue_golomb_long(gb);
 274     nb_sh = get_ue_golomb_long(gb);
 275
 276     if (nb_sps > sps->num_long_term_ref_pics_sps)
 277         return AVERROR_INVALIDDATA;
 278     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 279         return AVERROR_INVALIDDATA;
 280
 281     rps->nb_refs = nb_sh + nb_sps;
 282
 283     for (i = 0; i < rps->nb_refs; i++) {
 284
 285         if (i < nb_sps) {
 286             uint8_t lt_idx_sps = 0;
 287
 288             if (sps->num_long_term_ref_pics_sps > 1)
 289                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 290
 291             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 292             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 293         } else {
 294             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 295             rps->used[i] = get_bits1(gb);
 296         }
 297
 298         rps->poc_msb_present[i] = get_bits1(gb);
 299         if (rps->poc_msb_present[i]) {
 300             int64_t delta = get_ue_golomb_long(gb);
 301             int64_t poc;
 302
 303             if (i && i != nb_sps)
 304                 delta += prev_delta_msb;
 305
 306             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 307             if (poc != (int32_t)poc)
 308                 return AVERROR_INVALIDDATA;
 309             rps->poc[i] = poc;
 310             prev_delta_msb = delta;
 311         }
 312     }
 313
 314     return 0;
 315 }
 316
 317 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
 318 {
 319     AVCodecContext *avctx = s->avctx;
 320     const HEVCParamSets *ps = &s->ps;
 321     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 322     const HEVCWindow *ow = &sps->output_window;
 323     unsigned int num = 0, den = 0;
 324
 325     avctx->pix_fmt             = sps->pix_fmt;
 326     avctx->coded_width         = sps->width;
 327     avctx->coded_height        = sps->height;
 328     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 329     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 330     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 331     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 332     avctx->level               = sps->ptl.general_ptl.level_idc;
 333
 334     ff_set_sar(avctx, sps->vui.sar);
 335
 336     if (sps->vui.video_signal_type_present_flag)
 337         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 338                                                             : AVCOL_RANGE_MPEG;
 339     else
 340         avctx->color_range = AVCOL_RANGE_MPEG;
 341
 342     if (sps->vui.colour_description_present_flag) {
 343         avctx->color_primaries = sps->vui.colour_primaries;
 344         avctx->color_trc       = sps->vui.transfer_characteristic;
 345         avctx->colorspace      = sps->vui.matrix_coeffs;
 346     } else {
 347         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 348         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 349         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 350     }
 351
 352     avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
 353     if (sps->chroma_format_idc == 1) {
 354         if (sps->vui.chroma_loc_info_present_flag) {
 355             if (sps->vui.chroma_sample_loc_type_top_field <= 5)
 356                 avctx->chroma_sample_location = sps->vui.chroma_sample_loc_type_top_field + 1;
 357         } else
 358             avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
 359     }
 360
 361     if (vps->vps_timing_info_present_flag) {
 362         num = vps->vps_num_units_in_tick;
 363         den = vps->vps_time_scale;
 364     } else if (sps->vui.vui_timing_info_present_flag) {
 365         num = sps->vui.vui_num_units_in_tick;
 366         den = sps->vui.vui_time_scale;
 367     }
 368
 369     if (num != 0 && den != 0)
 370         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 371                   num, den, 1 << 30);
 372 }
 373
 374 static int export_stream_params_from_sei(HEVCContext *s)
 375 {
 376     AVCodecContext *avctx = s->avctx;
 377
 378     if (s->sei.a53_caption.buf_ref)
 379         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
 380
 381     if (s->sei.alternative_transfer.present &&
 382         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
 383         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
 384         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
 385     }
 386
 387     return 0;
 388 }
 389
 390 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 391 {
 392 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
 393                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 394                      CONFIG_HEVC_NVDEC_HWACCEL + \
 395                      CONFIG_HEVC_VAAPI_HWACCEL + \
 396                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
 397                      CONFIG_HEVC_VDPAU_HWACCEL)
 398     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 399
 400     switch (sps->pix_fmt) {
 401     case AV_PIX_FMT_YUV420P:
 402     case AV_PIX_FMT_YUVJ420P:
 403 #if CONFIG_HEVC_DXVA2_HWACCEL
 404         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 405 #endif
 406 #if CONFIG_HEVC_D3D11VA_HWACCEL
 407         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 408         *fmt++ = AV_PIX_FMT_D3D11;
 409 #endif
 410 #if CONFIG_HEVC_VAAPI_HWACCEL
 411         *fmt++ = AV_PIX_FMT_VAAPI;
 412 #endif
 413 #if CONFIG_HEVC_VDPAU_HWACCEL
 414         *fmt++ = AV_PIX_FMT_VDPAU;
 415 #endif
 416 #if CONFIG_HEVC_NVDEC_HWACCEL
 417         *fmt++ = AV_PIX_FMT_CUDA;
 418 #endif
 419 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 420         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 421 #endif
 422         break;
 423     case AV_PIX_FMT_YUV420P10:
 424 #if CONFIG_HEVC_DXVA2_HWACCEL
 425         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 426 #endif
 427 #if CONFIG_HEVC_D3D11VA_HWACCEL
 428         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 429         *fmt++ = AV_PIX_FMT_D3D11;
 430 #endif
 431 #if CONFIG_HEVC_VAAPI_HWACCEL
 432         *fmt++ = AV_PIX_FMT_VAAPI;
 433 #endif
 434 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 435         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 436 #endif
 437 #if CONFIG_HEVC_VDPAU_HWACCEL
 438         *fmt++ = AV_PIX_FMT_VDPAU;
 439 #endif
 440 #if CONFIG_HEVC_NVDEC_HWACCEL
 441         *fmt++ = AV_PIX_FMT_CUDA;
 442 #endif
 443         break;
 444     case AV_PIX_FMT_YUV444P:
 445 #if CONFIG_HEVC_VDPAU_HWACCEL
 446         *fmt++ = AV_PIX_FMT_VDPAU;
 447 #endif
 448 #if CONFIG_HEVC_NVDEC_HWACCEL
 449         *fmt++ = AV_PIX_FMT_CUDA;
 450 #endif
 451         break;
 452     case AV_PIX_FMT_YUV422P:
 453     case AV_PIX_FMT_YUV422P10LE:
 454 #if CONFIG_HEVC_VAAPI_HWACCEL
 455        *fmt++ = AV_PIX_FMT_VAAPI;
 456 #endif
 457         break;
 458     case AV_PIX_FMT_YUV420P12:
 459     case AV_PIX_FMT_YUV444P10:
 460     case AV_PIX_FMT_YUV444P12:
 461 #if CONFIG_HEVC_VDPAU_HWACCEL
 462         *fmt++ = AV_PIX_FMT_VDPAU;
 463 #endif
 464 #if CONFIG_HEVC_NVDEC_HWACCEL
 465         *fmt++ = AV_PIX_FMT_CUDA;
 466 #endif
 467         break;
 468     }
 469
 470     *fmt++ = sps->pix_fmt;
 471     *fmt = AV_PIX_FMT_NONE;
 472
 473     return ff_thread_get_format(s->avctx, pix_fmts);
 474 }
 475
 476 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 477                    enum AVPixelFormat pix_fmt)
 478 {
 479     int ret, i;
 480
 481     pic_arrays_free(s);
 482     s->ps.sps = NULL;
 483     s->ps.vps = NULL;
 484
 485     if (!sps)
 486         return 0;
 487
 488     ret = pic_arrays_init(s, sps);
 489     if (ret < 0)
 490         goto fail;
 491
 492     export_stream_params(s, sps);
 493
 494     s->avctx->pix_fmt = pix_fmt;
 495
 496     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 497     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 498     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 499
 500     for (i = 0; i < 3; i++) {
 501         av_freep(&s->sao_pixel_buffer_h[i]);
 502         av_freep(&s->sao_pixel_buffer_v[i]);
 503     }
 504
 505     if (sps->sao_enabled && !s->avctx->hwaccel) {
 506         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 507         int c_idx;
 508
 509         for(c_idx = 0; c_idx < c_count; c_idx++) {
 510             int w = sps->width >> sps->hshift[c_idx];
 511             int h = sps->height >> sps->vshift[c_idx];
 512             s->sao_pixel_buffer_h[c_idx] =
 513                 av_malloc((w * 2 * sps->ctb_height) <<
 514                           sps->pixel_shift);
 515             s->sao_pixel_buffer_v[c_idx] =
 516                 av_malloc((h * 2 * sps->ctb_width) <<
 517                           sps->pixel_shift);
 518             if (!s->sao_pixel_buffer_h[c_idx] ||
 519                 !s->sao_pixel_buffer_v[c_idx])
 520                 goto fail;
 521         }
 522     }
 523
 524     s->ps.sps = sps;
 525     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 526
 527     return 0;
 528
 529 fail:
 530     pic_arrays_free(s);
 531     for (i = 0; i < 3; i++) {
 532         av_freep(&s->sao_pixel_buffer_h[i]);
 533         av_freep(&s->sao_pixel_buffer_v[i]);
 534     }
 535     s->ps.sps = NULL;
 536     return ret;
 537 }
 538
 539 static int hls_slice_header(HEVCContext *s)
 540 {
 541     GetBitContext *gb = &s->HEVClc->gb;
 542     SliceHeader *sh   = &s->sh;
 543     int i, ret;
 544
 545     // Coded parameters
 546     sh->first_slice_in_pic_flag = get_bits1(gb);
 547     if (s->ref && sh->first_slice_in_pic_flag) {
 548         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
 549         return 1; // This slice will be skipped later, do not corrupt state
 550     }
 551
 552     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 553         s->seq_decode = (s->seq_decode + 1) & 0xff;
 554         s->max_ra     = INT_MAX;
 555         if (IS_IDR(s))
 556             ff_hevc_clear_refs(s);
 557     }
 558     sh->no_output_of_prior_pics_flag = 0;
 559     if (IS_IRAP(s))
 560         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 561
 562     sh->pps_id = get_ue_golomb_long(gb);
 563     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 564         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 565         return AVERROR_INVALIDDATA;
 566     }
 567     if (!sh->first_slice_in_pic_flag &&
 568         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 569         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 570         return AVERROR_INVALIDDATA;
 571     }
 572     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 573     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 574         sh->no_output_of_prior_pics_flag = 1;
 575
 576     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 577         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 578         const HEVCSPS *last_sps = s->ps.sps;
 579         enum AVPixelFormat pix_fmt;
 580
 581         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 582             if (sps->width != last_sps->width || sps->height != last_sps->height ||
 583                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
 584                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 585                 sh->no_output_of_prior_pics_flag = 0;
 586         }
 587         ff_hevc_clear_refs(s);
 588
 589         ret = set_sps(s, sps, sps->pix_fmt);
 590         if (ret < 0)
 591             return ret;
 592
 593         pix_fmt = get_format(s, sps);
 594         if (pix_fmt < 0)
 595             return pix_fmt;
 596         s->avctx->pix_fmt = pix_fmt;
 597
 598         s->seq_decode = (s->seq_decode + 1) & 0xff;
 599         s->max_ra     = INT_MAX;
 600     }
 601
 602     ret = export_stream_params_from_sei(s);
 603     if (ret < 0)
 604         return ret;
 605
 606     sh->dependent_slice_segment_flag = 0;
 607     if (!sh->first_slice_in_pic_flag) {
 608         int slice_address_length;
 609
 610         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 611             sh->dependent_slice_segment_flag = get_bits1(gb);
 612
 613         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 614                                             s->ps.sps->ctb_height);
 615         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 616         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 617             av_log(s->avctx, AV_LOG_ERROR,
 618                    "Invalid slice segment address: %u.\n",
 619                    sh->slice_segment_addr);
 620             return AVERROR_INVALIDDATA;
 621         }
 622
 623         if (!sh->dependent_slice_segment_flag) {
 624             sh->slice_addr = sh->slice_segment_addr;
 625             s->slice_idx++;
 626         }
 627     } else {
 628         sh->slice_segment_addr = sh->slice_addr = 0;
 629         s->slice_idx           = 0;
 630         s->slice_initialized   = 0;
 631     }
 632
 633     if (!sh->dependent_slice_segment_flag) {
 634         s->slice_initialized = 0;
 635
 636         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 637             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 638
 639         sh->slice_type = get_ue_golomb_long(gb);
 640         if (!(sh->slice_type == HEVC_SLICE_I ||
 641               sh->slice_type == HEVC_SLICE_P ||
 642               sh->slice_type == HEVC_SLICE_B)) {
 643             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 644                    sh->slice_type);
 645             return AVERROR_INVALIDDATA;
 646         }
 647         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 648             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 649             return AVERROR_INVALIDDATA;
 650         }
 651
 652         // when flag is not present, picture is inferred to be output
 653         sh->pic_output_flag = 1;
 654         if (s->ps.pps->output_flag_present_flag)
 655             sh->pic_output_flag = get_bits1(gb);
 656
 657         if (s->ps.sps->separate_colour_plane_flag)
 658             sh->colour_plane_id = get_bits(gb, 2);
 659
 660         if (!IS_IDR(s)) {
 661             int poc, pos;
 662
 663             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 664             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
 665             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 666                 av_log(s->avctx, AV_LOG_WARNING,
 667                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 668                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 669                     return AVERROR_INVALIDDATA;
 670                 poc = s->poc;
 671             }
 672             s->poc = poc;
 673
 674             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 675             pos = get_bits_left(gb);
 676             if (!sh->short_term_ref_pic_set_sps_flag) {
 677                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 678                 if (ret < 0)
 679                     return ret;
 680
 681                 sh->short_term_rps = &sh->slice_rps;
 682             } else {
 683                 int numbits, rps_idx;
 684
 685                 if (!s->ps.sps->nb_st_rps) {
 686                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 687                     return AVERROR_INVALIDDATA;
 688                 }
 689
 690                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 691                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 692                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 693             }
 694             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 695
 696             pos = get_bits_left(gb);
 697             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 698             if (ret < 0) {
 699                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 700                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 701                     return AVERROR_INVALIDDATA;
 702             }
 703             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 704
 705             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 706                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 707             else
 708                 sh->slice_temporal_mvp_enabled_flag = 0;
 709         } else {
 710             s->sh.short_term_rps = NULL;
 711             s->poc               = 0;
 712         }
 713
 714         /* 8.3.1 */
 715         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 716             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 717             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 718             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 719             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 720             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 721             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 722             s->nal_unit_type != HEVC_NAL_RASL_R)
 723             s->pocTid0 = s->poc;
 724
 725         if (s->ps.sps->sao_enabled) {
 726             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 727             if (s->ps.sps->chroma_format_idc) {
 728                 sh->slice_sample_adaptive_offset_flag[1] =
 729                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 730             }
 731         } else {
 732             sh->slice_sample_adaptive_offset_flag[0] = 0;
 733             sh->slice_sample_adaptive_offset_flag[1] = 0;
 734             sh->slice_sample_adaptive_offset_flag[2] = 0;
 735         }
 736
 737         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 738         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 739             int nb_refs;
 740
 741             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 742             if (sh->slice_type == HEVC_SLICE_B)
 743                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 744
 745             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 746                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 747                 if (sh->slice_type == HEVC_SLICE_B)
 748                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 749             }
 750             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 751                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 752                        sh->nb_refs[L0], sh->nb_refs[L1]);
 753                 return AVERROR_INVALIDDATA;
 754             }
 755
 756             sh->rpl_modification_flag[0] = 0;
 757             sh->rpl_modification_flag[1] = 0;
 758             nb_refs = ff_hevc_frame_nb_refs(s);
 759             if (!nb_refs) {
 760                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 761                 return AVERROR_INVALIDDATA;
 762             }
 763
 764             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 765                 sh->rpl_modification_flag[0] = get_bits1(gb);
 766                 if (sh->rpl_modification_flag[0]) {
 767                     for (i = 0; i < sh->nb_refs[L0]; i++)
 768                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 769                 }
 770
 771                 if (sh->slice_type == HEVC_SLICE_B) {
 772                     sh->rpl_modification_flag[1] = get_bits1(gb);
 773                     if (sh->rpl_modification_flag[1] == 1)
 774                         for (i = 0; i < sh->nb_refs[L1]; i++)
 775                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 776                 }
 777             }
 778
 779             if (sh->slice_type == HEVC_SLICE_B)
 780                 sh->mvd_l1_zero_flag = get_bits1(gb);
 781
 782             if (s->ps.pps->cabac_init_present_flag)
 783                 sh->cabac_init_flag = get_bits1(gb);
 784             else
 785                 sh->cabac_init_flag = 0;
 786
 787             sh->collocated_ref_idx = 0;
 788             if (sh->slice_temporal_mvp_enabled_flag) {
 789                 sh->collocated_list = L0;
 790                 if (sh->slice_type == HEVC_SLICE_B)
 791                     sh->collocated_list = !get_bits1(gb);
 792
 793                 if (sh->nb_refs[sh->collocated_list] > 1) {
 794                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 795                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 796                         av_log(s->avctx, AV_LOG_ERROR,
 797                                "Invalid collocated_ref_idx: %d.\n",
 798                                sh->collocated_ref_idx);
 799                         return AVERROR_INVALIDDATA;
 800                     }
 801                 }
 802             }
 803
 804             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 805                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 806                 int ret = pred_weight_table(s, gb);
 807                 if (ret < 0)
 808                     return ret;
 809             }
 810
 811             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 812             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 813                 av_log(s->avctx, AV_LOG_ERROR,
 814                        "Invalid number of merging MVP candidates: %d.\n",
 815                        sh->max_num_merge_cand);
 816                 return AVERROR_INVALIDDATA;
 817             }
 818         }
 819
 820         sh->slice_qp_delta = get_se_golomb(gb);
 821
 822         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 823             sh->slice_cb_qp_offset = get_se_golomb(gb);
 824             sh->slice_cr_qp_offset = get_se_golomb(gb);
 825             if (sh->slice_cb_qp_offset < -12 || sh->slice_cb_qp_offset > 12 ||
 826                 sh->slice_cr_qp_offset < -12 || sh->slice_cr_qp_offset > 12) {
 827                 av_log(s->avctx, AV_LOG_ERROR, "Invalid slice cx qp offset.\n");
 828                 return AVERROR_INVALIDDATA;
 829             }
 830         } else {
 831             sh->slice_cb_qp_offset = 0;
 832             sh->slice_cr_qp_offset = 0;
 833         }
 834
 835         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 836             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 837         else
 838             sh->cu_chroma_qp_offset_enabled_flag = 0;
 839
 840         if (s->ps.pps->deblocking_filter_control_present_flag) {
 841             int deblocking_filter_override_flag = 0;
 842
 843             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 844                 deblocking_filter_override_flag = get_bits1(gb);
 845
 846             if (deblocking_filter_override_flag) {
 847                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 848                 if (!sh->disable_deblocking_filter_flag) {
 849                     int beta_offset_div2 = get_se_golomb(gb);
 850                     int tc_offset_div2   = get_se_golomb(gb) ;
 851                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
 852                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
 853                         av_log(s->avctx, AV_LOG_ERROR,
 854                             "Invalid deblock filter offsets: %d, %d\n",
 855                             beta_offset_div2, tc_offset_div2);
 856                         return AVERROR_INVALIDDATA;
 857                     }
 858                     sh->beta_offset = beta_offset_div2 * 2;
 859                     sh->tc_offset   =   tc_offset_div2 * 2;
 860                 }
 861             } else {
 862                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 863                 sh->beta_offset                    = s->ps.pps->beta_offset;
 864                 sh->tc_offset                      = s->ps.pps->tc_offset;
 865             }
 866         } else {
 867             sh->disable_deblocking_filter_flag = 0;
 868             sh->beta_offset                    = 0;
 869             sh->tc_offset                      = 0;
 870         }
 871
 872         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 873             (sh->slice_sample_adaptive_offset_flag[0] ||
 874              sh->slice_sample_adaptive_offset_flag[1] ||
 875              !sh->disable_deblocking_filter_flag)) {
 876             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 877         } else {
 878             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 879         }
 880     } else if (!s->slice_initialized) {
 881         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 882         return AVERROR_INVALIDDATA;
 883     }
 884
 885     sh->num_entry_point_offsets = 0;
 886     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 887         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 888         // It would be possible to bound this tighter but this here is simpler
 889         if (num_entry_point_offsets > get_bits_left(gb)) {
 890             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 891             return AVERROR_INVALIDDATA;
 892         }
 893
 894         sh->num_entry_point_offsets = num_entry_point_offsets;
 895         if (sh->num_entry_point_offsets > 0) {
 896             int offset_len = get_ue_golomb_long(gb) + 1;
 897
 898             if (offset_len < 1 || offset_len > 32) {
 899                 sh->num_entry_point_offsets = 0;
 900                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 901                 return AVERROR_INVALIDDATA;
 902             }
 903
 904             av_freep(&sh->entry_point_offset);
 905             av_freep(&sh->offset);
 906             av_freep(&sh->size);
 907             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 908             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 909             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 910             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 911                 sh->num_entry_point_offsets = 0;
 912                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 913                 return AVERROR(ENOMEM);
 914             }
 915             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 916                 unsigned val = get_bits_long(gb, offset_len);
 917                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 918             }
 919             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 920                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 921                 s->threads_number = 1;
 922             } else
 923                 s->enable_parallel_tiles = 0;
 924         } else
 925             s->enable_parallel_tiles = 0;
 926     }
 927
 928     if (s->ps.pps->slice_header_extension_present_flag) {
 929         unsigned int length = get_ue_golomb_long(gb);
 930         if (length*8LL > get_bits_left(gb)) {
 931             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 932             return AVERROR_INVALIDDATA;
 933         }
 934         for (i = 0; i < length; i++)
 935             skip_bits(gb, 8);  // slice_header_extension_data_byte
 936     }
 937
 938     // Inferred parameters
 939     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 940     if (sh->slice_qp > 51 ||
 941         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 942         av_log(s->avctx, AV_LOG_ERROR,
 943                "The slice_qp %d is outside the valid range "
 944                "[%d, 51].\n",
 945                sh->slice_qp,
 946                -s->ps.sps->qp_bd_offset);
 947         return AVERROR_INVALIDDATA;
 948     }
 949
 950     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 951
 952     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 953         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 954         return AVERROR_INVALIDDATA;
 955     }
 956
 957     if (get_bits_left(gb) < 0) {
 958         av_log(s->avctx, AV_LOG_ERROR,
 959                "Overread slice header by %d bits\n", -get_bits_left(gb));
 960         return AVERROR_INVALIDDATA;
 961     }
 962
 963     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 964
 965     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 966         s->HEVClc->qp_y = s->sh.slice_qp;
 967
 968     s->slice_initialized = 1;
 969     s->HEVClc->tu.cu_qp_offset_cb = 0;
 970     s->HEVClc->tu.cu_qp_offset_cr = 0;
 971
 972     return 0;
 973 }
 974
 975 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 976
 977 #define SET_SAO(elem, value)                            \
 978 do {                                                    \
 979     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 980         sao->elem = value;                              \
 981     else if (sao_merge_left_flag)                       \
 982         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 983     else if (sao_merge_up_flag)                         \
 984         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 985     else                                                \
 986         sao->elem = 0;                                  \
 987 } while (0)
 988
 989 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 990 {
 991     HEVCLocalContext *lc    = s->HEVClc;
 992     int sao_merge_left_flag = 0;
 993     int sao_merge_up_flag   = 0;
 994     SAOParams *sao          = &CTB(s->sao, rx, ry);
 995     int c_idx, i;
 996
 997     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 998         s->sh.slice_sample_adaptive_offset_flag[1]) {
 999         if (rx > 0) {
1000             if (lc->ctb_left_flag)
1001                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
1002         }
1003         if (ry > 0 && !sao_merge_left_flag) {
1004             if (lc->ctb_up_flag)
1005                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
1006         }
1007     }
1008
1009     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
1010         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
1011                                                  s->ps.pps->log2_sao_offset_scale_chroma;
1012
1013         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
1014             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
1015             continue;
1016         }
1017
1018         if (c_idx == 2) {
1019             sao->type_idx[2] = sao->type_idx[1];
1020             sao->eo_class[2] = sao->eo_class[1];
1021         } else {
1022             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
1023         }
1024
1025         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
1026             continue;
1027
1028         for (i = 0; i < 4; i++)
1029             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
1030
1031         if (sao->type_idx[c_idx] == SAO_BAND) {
1032             for (i = 0; i < 4; i++) {
1033                 if (sao->offset_abs[c_idx][i]) {
1034                     SET_SAO(offset_sign[c_idx][i],
1035                             ff_hevc_sao_offset_sign_decode(s));
1036                 } else {
1037                     sao->offset_sign[c_idx][i] = 0;
1038                 }
1039             }
1040             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
1041         } else if (c_idx != 2) {
1042             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
1043         }
1044
1045         // Inferred parameters
1046         sao->offset_val[c_idx][0] = 0;
1047         for (i = 0; i < 4; i++) {
1048             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
1049             if (sao->type_idx[c_idx] == SAO_EDGE) {
1050                 if (i > 1)
1051                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1052             } else if (sao->offset_sign[c_idx][i]) {
1053                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1054             }
1055             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1056         }
1057     }
1058 }
1059
1060 #undef SET_SAO
1061 #undef CTB
1062
1063 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1064     HEVCLocalContext *lc    = s->HEVClc;
1065     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1066
1067     if (log2_res_scale_abs_plus1 !=  0) {
1068         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1069         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1070                                (1 - 2 * res_scale_sign_flag);
1071     } else {
1072         lc->tu.res_scale_val = 0;
1073     }
1074
1075
1076     return 0;
1077 }
1078
1079 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1080                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1081                               int log2_cb_size, int log2_trafo_size,
1082                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1083 {
1084     HEVCLocalContext *lc = s->HEVClc;
1085     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1086     int i;
1087
1088     if (lc->cu.pred_mode == MODE_INTRA) {
1089         int trafo_size = 1 << log2_trafo_size;
1090         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1091
1092         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1093     }
1094
1095     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1096         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1097         int scan_idx   = SCAN_DIAG;
1098         int scan_idx_c = SCAN_DIAG;
1099         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1100                          (s->ps.sps->chroma_format_idc == 2 &&
1101                          (cbf_cb[1] || cbf_cr[1]));
1102
1103         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1104             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1105             if (lc->tu.cu_qp_delta != 0)
1106                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1107                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1108             lc->tu.is_cu_qp_delta_coded = 1;
1109
1110             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1111                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1112                 av_log(s->avctx, AV_LOG_ERROR,
1113                        "The cu_qp_delta %d is outside the valid range "
1114                        "[%d, %d].\n",
1115                        lc->tu.cu_qp_delta,
1116                        -(26 + s->ps.sps->qp_bd_offset / 2),
1117                         (25 + s->ps.sps->qp_bd_offset / 2));
1118                 return AVERROR_INVALIDDATA;
1119             }
1120
1121             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1122         }
1123
1124         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1125             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1126             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1127             if (cu_chroma_qp_offset_flag) {
1128                 int cu_chroma_qp_offset_idx  = 0;
1129                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1130                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1131                     av_log(s->avctx, AV_LOG_ERROR,
1132                         "cu_chroma_qp_offset_idx not yet tested.\n");
1133                 }
1134                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1135                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1136             } else {
1137                 lc->tu.cu_qp_offset_cb = 0;
1138                 lc->tu.cu_qp_offset_cr = 0;
1139             }
1140             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1141         }
1142
1143         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1144             if (lc->tu.intra_pred_mode >= 6 &&
1145                 lc->tu.intra_pred_mode <= 14) {
1146                 scan_idx = SCAN_VERT;
1147             } else if (lc->tu.intra_pred_mode >= 22 &&
1148                        lc->tu.intra_pred_mode <= 30) {
1149                 scan_idx = SCAN_HORIZ;
1150             }
1151
1152             if (lc->tu.intra_pred_mode_c >=  6 &&
1153                 lc->tu.intra_pred_mode_c <= 14) {
1154                 scan_idx_c = SCAN_VERT;
1155             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1156                        lc->tu.intra_pred_mode_c <= 30) {
1157                 scan_idx_c = SCAN_HORIZ;
1158             }
1159         }
1160
1161         lc->tu.cross_pf = 0;
1162
1163         if (cbf_luma)
1164             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1165         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1166             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1167             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1168             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1169                                 (lc->cu.pred_mode == MODE_INTER ||
1170                                  (lc->tu.chroma_mode_c ==  4)));
1171
1172             if (lc->tu.cross_pf) {
1173                 hls_cross_component_pred(s, 0);
1174             }
1175             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1176                 if (lc->cu.pred_mode == MODE_INTRA) {
1177                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1178                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1179                 }
1180                 if (cbf_cb[i])
1181                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1182                                                 log2_trafo_size_c, scan_idx_c, 1);
1183                 else
1184                     if (lc->tu.cross_pf) {
1185                         ptrdiff_t stride = s->frame->linesize[1];
1186                         int hshift = s->ps.sps->hshift[1];
1187                         int vshift = s->ps.sps->vshift[1];
1188                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1189                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1190                         int size = 1 << log2_trafo_size_c;
1191
1192                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1193                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1194                         for (i = 0; i < (size * size); i++) {
1195                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1196                         }
1197                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1198                     }
1199             }
1200
1201             if (lc->tu.cross_pf) {
1202                 hls_cross_component_pred(s, 1);
1203             }
1204             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1205                 if (lc->cu.pred_mode == MODE_INTRA) {
1206                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1207                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1208                 }
1209                 if (cbf_cr[i])
1210                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1211                                                 log2_trafo_size_c, scan_idx_c, 2);
1212                 else
1213                     if (lc->tu.cross_pf) {
1214                         ptrdiff_t stride = s->frame->linesize[2];
1215                         int hshift = s->ps.sps->hshift[2];
1216                         int vshift = s->ps.sps->vshift[2];
1217                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1218                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1219                         int size = 1 << log2_trafo_size_c;
1220
1221                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1222                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1223                         for (i = 0; i < (size * size); i++) {
1224                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1225                         }
1226                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1227                     }
1228             }
1229         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1230             int trafo_size_h = 1 << (log2_trafo_size + 1);
1231             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1232             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1233                 if (lc->cu.pred_mode == MODE_INTRA) {
1234                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1235                                                     trafo_size_h, trafo_size_v);
1236                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1237                 }
1238                 if (cbf_cb[i])
1239                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1240                                                 log2_trafo_size, scan_idx_c, 1);
1241             }
1242             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1243                 if (lc->cu.pred_mode == MODE_INTRA) {
1244                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1245                                                 trafo_size_h, trafo_size_v);
1246                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1247                 }
1248                 if (cbf_cr[i])
1249                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1250                                                 log2_trafo_size, scan_idx_c, 2);
1251             }
1252         }
1253     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1254         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1255             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1256             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1257             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1258             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1259             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1260             if (s->ps.sps->chroma_format_idc == 2) {
1261                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1262                                                 trafo_size_h, trafo_size_v);
1263                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1264                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1265             }
1266         } else if (blk_idx == 3) {
1267             int trafo_size_h = 1 << (log2_trafo_size + 1);
1268             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1269             ff_hevc_set_neighbour_available(s, xBase, yBase,
1270                                             trafo_size_h, trafo_size_v);
1271             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1272             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1273             if (s->ps.sps->chroma_format_idc == 2) {
1274                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1275                                                 trafo_size_h, trafo_size_v);
1276                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1277                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1278             }
1279         }
1280     }
1281
1282     return 0;
1283 }
1284
1285 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1286 {
1287     int cb_size          = 1 << log2_cb_size;
1288     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1289
1290     int min_pu_width     = s->ps.sps->min_pu_width;
1291     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1292     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1293     int i, j;
1294
1295     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1296         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1297             s->is_pcm[i + j * min_pu_width] = 2;
1298 }
1299
1300 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1301                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1302                               int log2_cb_size, int log2_trafo_size,
1303                               int trafo_depth, int blk_idx,
1304                               const int *base_cbf_cb, const int *base_cbf_cr)
1305 {
1306     HEVCLocalContext *lc = s->HEVClc;
1307     uint8_t split_transform_flag;
1308     int cbf_cb[2];
1309     int cbf_cr[2];
1310     int ret;
1311
1312     cbf_cb[0] = base_cbf_cb[0];
1313     cbf_cb[1] = base_cbf_cb[1];
1314     cbf_cr[0] = base_cbf_cr[0];
1315     cbf_cr[1] = base_cbf_cr[1];
1316
1317     if (lc->cu.intra_split_flag) {
1318         if (trafo_depth == 1) {
1319             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1320             if (s->ps.sps->chroma_format_idc == 3) {
1321                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1322                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1323             } else {
1324                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1325                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1326             }
1327         }
1328     } else {
1329         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1330         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1331         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1332     }
1333
1334     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1335         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1336         trafo_depth     < lc->cu.max_trafo_depth       &&
1337         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1338         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1339     } else {
1340         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1341                           lc->cu.pred_mode == MODE_INTER &&
1342                           lc->cu.part_mode != PART_2Nx2N &&
1343                           trafo_depth == 0;
1344
1345         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1346                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1347                                inter_split;
1348     }
1349
1350     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1351         if (trafo_depth == 0 || cbf_cb[0]) {
1352             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1353             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1354                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1355             }
1356         }
1357
1358         if (trafo_depth == 0 || cbf_cr[0]) {
1359             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1360             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1361                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1362             }
1363         }
1364     }
1365
1366     if (split_transform_flag) {
1367         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1368         const int x1 = x0 + trafo_size_split;
1369         const int y1 = y0 + trafo_size_split;
1370
1371 #define SUBDIVIDE(x, y, idx)                                                    \
1372 do {                                                                            \
1373     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1374                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1375                              cbf_cb, cbf_cr);                                   \
1376     if (ret < 0)                                                                \
1377         return ret;                                                             \
1378 } while (0)
1379
1380         SUBDIVIDE(x0, y0, 0);
1381         SUBDIVIDE(x1, y0, 1);
1382         SUBDIVIDE(x0, y1, 2);
1383         SUBDIVIDE(x1, y1, 3);
1384
1385 #undef SUBDIVIDE
1386     } else {
1387         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1388         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1389         int min_tu_width     = s->ps.sps->min_tb_width;
1390         int cbf_luma         = 1;
1391
1392         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1393             cbf_cb[0] || cbf_cr[0] ||
1394             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1395             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1396         }
1397
1398         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1399                                  log2_cb_size, log2_trafo_size,
1400                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1401         if (ret < 0)
1402             return ret;
1403         // TODO: store cbf_luma somewhere else
1404         if (cbf_luma) {
1405             int i, j;
1406             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1407                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1408                     int x_tu = (x0 + j) >> log2_min_tu_size;
1409                     int y_tu = (y0 + i) >> log2_min_tu_size;
1410                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1411                 }
1412         }
1413         if (!s->sh.disable_deblocking_filter_flag) {
1414             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1415             if (s->ps.pps->transquant_bypass_enable_flag &&
1416                 lc->cu.cu_transquant_bypass_flag)
1417                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1418         }
1419     }
1420     return 0;
1421 }
1422
1423 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1424 {
1425     HEVCLocalContext *lc = s->HEVClc;
1426     GetBitContext gb;
1427     int cb_size   = 1 << log2_cb_size;
1428     ptrdiff_t stride0 = s->frame->linesize[0];
1429     ptrdiff_t stride1 = s->frame->linesize[1];
1430     ptrdiff_t stride2 = s->frame->linesize[2];
1431     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1432     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1433     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1434
1435     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1436                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1437                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1438                           s->ps.sps->pcm.bit_depth_chroma;
1439     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1440     int ret;
1441
1442     if (!s->sh.disable_deblocking_filter_flag)
1443         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1444
1445     ret = init_get_bits(&gb, pcm, length);
1446     if (ret < 0)
1447         return ret;
1448
1449     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1450     if (s->ps.sps->chroma_format_idc) {
1451         s->hevcdsp.put_pcm(dst1, stride1,
1452                            cb_size >> s->ps.sps->hshift[1],
1453                            cb_size >> s->ps.sps->vshift[1],
1454                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1455         s->hevcdsp.put_pcm(dst2, stride2,
1456                            cb_size >> s->ps.sps->hshift[2],
1457                            cb_size >> s->ps.sps->vshift[2],
1458                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1459     }
1460
1461     return 0;
1462 }
1463
1464 /**
1465  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1466  *
1467  * @param s HEVC decoding context
1468  * @param dst target buffer for block data at block position
1469  * @param dststride stride of the dst buffer
1470  * @param ref reference picture buffer at origin (0, 0)
1471  * @param mv motion vector (relative to block position) to get pixel data from
1472  * @param x_off horizontal position of block from origin (0, 0)
1473  * @param y_off vertical position of block from origin (0, 0)
1474  * @param block_w width of block
1475  * @param block_h height of block
1476  * @param luma_weight weighting factor applied to the luma prediction
1477  * @param luma_offset additive offset applied to the luma prediction value
1478  */
1479
1480 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1481                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1482                         int block_w, int block_h, int luma_weight, int luma_offset)
1483 {
1484     HEVCLocalContext *lc = s->HEVClc;
1485     uint8_t *src         = ref->data[0];
1486     ptrdiff_t srcstride  = ref->linesize[0];
1487     int pic_width        = s->ps.sps->width;
1488     int pic_height       = s->ps.sps->height;
1489     int mx               = mv->x & 3;
1490     int my               = mv->y & 3;
1491     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1492                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1493     int idx              = ff_hevc_pel_weight[block_w];
1494
1495     x_off += mv->x >> 2;
1496     y_off += mv->y >> 2;
1497     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1498
1499     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1500         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1501         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1502         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1503         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1504         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1505
1506         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1507                                  edge_emu_stride, srcstride,
1508                                  block_w + QPEL_EXTRA,
1509                                  block_h + QPEL_EXTRA,
1510                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1511                                  pic_width, pic_height);
1512         src = lc->edge_emu_buffer + buf_offset;
1513         srcstride = edge_emu_stride;
1514     }
1515
1516     if (!weight_flag)
1517         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1518                                                       block_h, mx, my, block_w);
1519     else
1520         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1521                                                         block_h, s->sh.luma_log2_weight_denom,
1522                                                         luma_weight, luma_offset, mx, my, block_w);
1523 }
1524
1525 /**
1526  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1527  *
1528  * @param s HEVC decoding context
1529  * @param dst target buffer for block data at block position
1530  * @param dststride stride of the dst buffer
1531  * @param ref0 reference picture0 buffer at origin (0, 0)
1532  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1533  * @param x_off horizontal position of block from origin (0, 0)
1534  * @param y_off vertical position of block from origin (0, 0)
1535  * @param block_w width of block
1536  * @param block_h height of block
1537  * @param ref1 reference picture1 buffer at origin (0, 0)
1538  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1539  * @param current_mv current motion vector structure
1540  */
1541  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1542                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1543                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1544 {
1545     HEVCLocalContext *lc = s->HEVClc;
1546     ptrdiff_t src0stride  = ref0->linesize[0];
1547     ptrdiff_t src1stride  = ref1->linesize[0];
1548     int pic_width        = s->ps.sps->width;
1549     int pic_height       = s->ps.sps->height;
1550     int mx0              = mv0->x & 3;
1551     int my0              = mv0->y & 3;
1552     int mx1              = mv1->x & 3;
1553     int my1              = mv1->y & 3;
1554     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1555                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1556     int x_off0           = x_off + (mv0->x >> 2);
1557     int y_off0           = y_off + (mv0->y >> 2);
1558     int x_off1           = x_off + (mv1->x >> 2);
1559     int y_off1           = y_off + (mv1->y >> 2);
1560     int idx              = ff_hevc_pel_weight[block_w];
1561
1562     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1563     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1564
1565     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1566         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1567         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1568         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1569         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1570         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1571
1572         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1573                                  edge_emu_stride, src0stride,
1574                                  block_w + QPEL_EXTRA,
1575                                  block_h + QPEL_EXTRA,
1576                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1577                                  pic_width, pic_height);
1578         src0 = lc->edge_emu_buffer + buf_offset;
1579         src0stride = edge_emu_stride;
1580     }
1581
1582     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1583         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1584         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1585         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1586         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1587         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1588
1589         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1590                                  edge_emu_stride, src1stride,
1591                                  block_w + QPEL_EXTRA,
1592                                  block_h + QPEL_EXTRA,
1593                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1594                                  pic_width, pic_height);
1595         src1 = lc->edge_emu_buffer2 + buf_offset;
1596         src1stride = edge_emu_stride;
1597     }
1598
1599     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1600                                                 block_h, mx0, my0, block_w);
1601     if (!weight_flag)
1602         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1603                                                        block_h, mx1, my1, block_w);
1604     else
1605         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1606                                                          block_h, s->sh.luma_log2_weight_denom,
1607                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1608                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1609                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1610                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1611                                                          mx1, my1, block_w);
1612
1613 }
1614
1615 /**
1616  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1617  *
1618  * @param s HEVC decoding context
1619  * @param dst1 target buffer for block data at block position (U plane)
1620  * @param dst2 target buffer for block data at block position (V plane)
1621  * @param dststride stride of the dst1 and dst2 buffers
1622  * @param ref reference picture buffer at origin (0, 0)
1623  * @param mv motion vector (relative to block position) to get pixel data from
1624  * @param x_off horizontal position of block from origin (0, 0)
1625  * @param y_off vertical position of block from origin (0, 0)
1626  * @param block_w width of block
1627  * @param block_h height of block
1628  * @param chroma_weight weighting factor applied to the chroma prediction
1629  * @param chroma_offset additive offset applied to the chroma prediction value
1630  */
1631
1632 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1633                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1634                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1635 {
1636     HEVCLocalContext *lc = s->HEVClc;
1637     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1638     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1639     const Mv *mv         = &current_mv->mv[reflist];
1640     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1641                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1642     int idx              = ff_hevc_pel_weight[block_w];
1643     int hshift           = s->ps.sps->hshift[1];
1644     int vshift           = s->ps.sps->vshift[1];
1645     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1646     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1647     intptr_t _mx         = mx << (1 - hshift);
1648     intptr_t _my         = my << (1 - vshift);
1649
1650     x_off += mv->x >> (2 + hshift);
1651     y_off += mv->y >> (2 + vshift);
1652     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1653
1654     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1655         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1656         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1657         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1658         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1659         int buf_offset0 = EPEL_EXTRA_BEFORE *
1660                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1661         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1662                                  edge_emu_stride, srcstride,
1663                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1664                                  x_off - EPEL_EXTRA_BEFORE,
1665                                  y_off - EPEL_EXTRA_BEFORE,
1666                                  pic_width, pic_height);
1667
1668         src0 = lc->edge_emu_buffer + buf_offset0;
1669         srcstride = edge_emu_stride;
1670     }
1671     if (!weight_flag)
1672         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1673                                                   block_h, _mx, _my, block_w);
1674     else
1675         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1676                                                         block_h, s->sh.chroma_log2_weight_denom,
1677                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1678 }
1679
1680 /**
1681  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1682  *
1683  * @param s HEVC decoding context
1684  * @param dst target buffer for block data at block position
1685  * @param dststride stride of the dst buffer
1686  * @param ref0 reference picture0 buffer at origin (0, 0)
1687  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1688  * @param x_off horizontal position of block from origin (0, 0)
1689  * @param y_off vertical position of block from origin (0, 0)
1690  * @param block_w width of block
1691  * @param block_h height of block
1692  * @param ref1 reference picture1 buffer at origin (0, 0)
1693  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1694  * @param current_mv current motion vector structure
1695  * @param cidx chroma component(cb, cr)
1696  */
1697 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1698                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1699 {
1700     HEVCLocalContext *lc = s->HEVClc;
1701     uint8_t *src1        = ref0->data[cidx+1];
1702     uint8_t *src2        = ref1->data[cidx+1];
1703     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1704     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1705     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1706                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1707     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1708     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1709     Mv *mv0              = &current_mv->mv[0];
1710     Mv *mv1              = &current_mv->mv[1];
1711     int hshift = s->ps.sps->hshift[1];
1712     int vshift = s->ps.sps->vshift[1];
1713
1714     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1715     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1716     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1717     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1718     intptr_t _mx0 = mx0 << (1 - hshift);
1719     intptr_t _my0 = my0 << (1 - vshift);
1720     intptr_t _mx1 = mx1 << (1 - hshift);
1721     intptr_t _my1 = my1 << (1 - vshift);
1722
1723     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1724     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1725     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1726     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1727     int idx = ff_hevc_pel_weight[block_w];
1728     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1729     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1730
1731     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1732         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1733         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1734         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1735         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1736         int buf_offset1 = EPEL_EXTRA_BEFORE *
1737                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1738
1739         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1740                                  edge_emu_stride, src1stride,
1741                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1742                                  x_off0 - EPEL_EXTRA_BEFORE,
1743                                  y_off0 - EPEL_EXTRA_BEFORE,
1744                                  pic_width, pic_height);
1745
1746         src1 = lc->edge_emu_buffer + buf_offset1;
1747         src1stride = edge_emu_stride;
1748     }
1749
1750     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1751         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1752         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1753         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1754         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1755         int buf_offset1 = EPEL_EXTRA_BEFORE *
1756                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1757
1758         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1759                                  edge_emu_stride, src2stride,
1760                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1761                                  x_off1 - EPEL_EXTRA_BEFORE,
1762                                  y_off1 - EPEL_EXTRA_BEFORE,
1763                                  pic_width, pic_height);
1764
1765         src2 = lc->edge_emu_buffer2 + buf_offset1;
1766         src2stride = edge_emu_stride;
1767     }
1768
1769     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1770                                                 block_h, _mx0, _my0, block_w);
1771     if (!weight_flag)
1772         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1773                                                        src2, src2stride, lc->tmp,
1774                                                        block_h, _mx1, _my1, block_w);
1775     else
1776         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1777                                                          src2, src2stride, lc->tmp,
1778                                                          block_h,
1779                                                          s->sh.chroma_log2_weight_denom,
1780                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1781                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1782                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1783                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1784                                                          _mx1, _my1, block_w);
1785 }
1786
1787 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1788                                 const Mv *mv, int y0, int height)
1789 {
1790     if (s->threads_type == FF_THREAD_FRAME ) {
1791         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1792
1793         ff_thread_await_progress(&ref->tf, y, 0);
1794     }
1795 }
1796
1797 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1798                                   int nPbH, int log2_cb_size, int part_idx,
1799                                   int merge_idx, MvField *mv)
1800 {
1801     HEVCLocalContext *lc = s->HEVClc;
1802     enum InterPredIdc inter_pred_idc = PRED_L0;
1803     int mvp_flag;
1804
1805     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1806     mv->pred_flag = 0;
1807     if (s->sh.slice_type == HEVC_SLICE_B)
1808         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1809
1810     if (inter_pred_idc != PRED_L1) {
1811         if (s->sh.nb_refs[L0])
1812             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1813
1814         mv->pred_flag = PF_L0;
1815         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1816         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1817         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1818                                  part_idx, merge_idx, mv, mvp_flag, 0);
1819         mv->mv[0].x += lc->pu.mvd.x;
1820         mv->mv[0].y += lc->pu.mvd.y;
1821     }
1822
1823     if (inter_pred_idc != PRED_L0) {
1824         if (s->sh.nb_refs[L1])
1825             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1826
1827         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1828             AV_ZERO32(&lc->pu.mvd);
1829         } else {
1830             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1831         }
1832
1833         mv->pred_flag += PF_L1;
1834         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1835         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1836                                  part_idx, merge_idx, mv, mvp_flag, 1);
1837         mv->mv[1].x += lc->pu.mvd.x;
1838         mv->mv[1].y += lc->pu.mvd.y;
1839     }
1840 }
1841
1842 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1843                                 int nPbW, int nPbH,
1844                                 int log2_cb_size, int partIdx, int idx)
1845 {
1846 #define POS(c_idx, x, y)                                                              \
1847     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1848                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1849     HEVCLocalContext *lc = s->HEVClc;
1850     int merge_idx = 0;
1851     struct MvField current_mv = {{{ 0 }}};
1852
1853     int min_pu_width = s->ps.sps->min_pu_width;
1854
1855     MvField *tab_mvf = s->ref->tab_mvf;
1856     RefPicList  *refPicList = s->ref->refPicList;
1857     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1858     uint8_t *dst0 = POS(0, x0, y0);
1859     uint8_t *dst1 = POS(1, x0, y0);
1860     uint8_t *dst2 = POS(2, x0, y0);
1861     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1862     int min_cb_width     = s->ps.sps->min_cb_width;
1863     int x_cb             = x0 >> log2_min_cb_size;
1864     int y_cb             = y0 >> log2_min_cb_size;
1865     int x_pu, y_pu;
1866     int i, j;
1867
1868     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1869
1870     if (!skip_flag)
1871         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1872
1873     if (skip_flag || lc->pu.merge_flag) {
1874         if (s->sh.max_num_merge_cand > 1)
1875             merge_idx = ff_hevc_merge_idx_decode(s);
1876         else
1877             merge_idx = 0;
1878
1879         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1880                                    partIdx, merge_idx, &current_mv);
1881     } else {
1882         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1883                               partIdx, merge_idx, &current_mv);
1884     }
1885
1886     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1887     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1888
1889     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1890         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1891             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1892
1893     if (current_mv.pred_flag & PF_L0) {
1894         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1895         if (!ref0)
1896             return;
1897         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1898     }
1899     if (current_mv.pred_flag & PF_L1) {
1900         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1901         if (!ref1)
1902             return;
1903         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1904     }
1905
1906     if (current_mv.pred_flag == PF_L0) {
1907         int x0_c = x0 >> s->ps.sps->hshift[1];
1908         int y0_c = y0 >> s->ps.sps->vshift[1];
1909         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1910         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1911
1912         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1913                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1914                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1915                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1916
1917         if (s->ps.sps->chroma_format_idc) {
1918             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1919                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1920                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1921             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1922                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1923                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1924         }
1925     } else if (current_mv.pred_flag == PF_L1) {
1926         int x0_c = x0 >> s->ps.sps->hshift[1];
1927         int y0_c = y0 >> s->ps.sps->vshift[1];
1928         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1929         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1930
1931         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1932                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1933                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1934                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1935
1936         if (s->ps.sps->chroma_format_idc) {
1937             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1938                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1939                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1940
1941             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1942                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1943                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1944         }
1945     } else if (current_mv.pred_flag == PF_BI) {
1946         int x0_c = x0 >> s->ps.sps->hshift[1];
1947         int y0_c = y0 >> s->ps.sps->vshift[1];
1948         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1949         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1950
1951         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1952                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1953                    ref1->frame, &current_mv.mv[1], &current_mv);
1954
1955         if (s->ps.sps->chroma_format_idc) {
1956             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1957                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1958
1959             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1960                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1961         }
1962     }
1963 }
1964
1965 /**
1966  * 8.4.1
1967  */
1968 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1969                                 int prev_intra_luma_pred_flag)
1970 {
1971     HEVCLocalContext *lc = s->HEVClc;
1972     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1973     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1974     int min_pu_width     = s->ps.sps->min_pu_width;
1975     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1976     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1977     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1978
1979     int cand_up   = (lc->ctb_up_flag || y0b) ?
1980                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1981     int cand_left = (lc->ctb_left_flag || x0b) ?
1982                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1983
1984     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1985
1986     MvField *tab_mvf = s->ref->tab_mvf;
1987     int intra_pred_mode;
1988     int candidate[3];
1989     int i, j;
1990
1991     // intra_pred_mode prediction does not cross vertical CTB boundaries
1992     if ((y0 - 1) < y_ctb)
1993         cand_up = INTRA_DC;
1994
1995     if (cand_left == cand_up) {
1996         if (cand_left < 2) {
1997             candidate[0] = INTRA_PLANAR;
1998             candidate[1] = INTRA_DC;
1999             candidate[2] = INTRA_ANGULAR_26;
2000         } else {
2001             candidate[0] = cand_left;
2002             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
2003             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
2004         }
2005     } else {
2006         candidate[0] = cand_left;
2007         candidate[1] = cand_up;
2008         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
2009             candidate[2] = INTRA_PLANAR;
2010         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
2011             candidate[2] = INTRA_DC;
2012         } else {
2013             candidate[2] = INTRA_ANGULAR_26;
2014         }
2015     }
2016
2017     if (prev_intra_luma_pred_flag) {
2018         intra_pred_mode = candidate[lc->pu.mpm_idx];
2019     } else {
2020         if (candidate[0] > candidate[1])
2021             FFSWAP(uint8_t, candidate[0], candidate[1]);
2022         if (candidate[0] > candidate[2])
2023             FFSWAP(uint8_t, candidate[0], candidate[2]);
2024         if (candidate[1] > candidate[2])
2025             FFSWAP(uint8_t, candidate[1], candidate[2]);
2026
2027         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
2028         for (i = 0; i < 3; i++)
2029             if (intra_pred_mode >= candidate[i])
2030                 intra_pred_mode++;
2031     }
2032
2033     /* write the intra prediction units into the mv array */
2034     if (!size_in_pus)
2035         size_in_pus = 1;
2036     for (i = 0; i < size_in_pus; i++) {
2037         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
2038                intra_pred_mode, size_in_pus);
2039
2040         for (j = 0; j < size_in_pus; j++) {
2041             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
2042         }
2043     }
2044
2045     return intra_pred_mode;
2046 }
2047
2048 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2049                                           int log2_cb_size, int ct_depth)
2050 {
2051     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2052     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2053     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2054     int y;
2055
2056     for (y = 0; y < length; y++)
2057         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2058                ct_depth, length);
2059 }
2060
2061 static const uint8_t tab_mode_idx[] = {
2062      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2063     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2064
2065 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2066                                   int log2_cb_size)
2067 {
2068     HEVCLocalContext *lc = s->HEVClc;
2069     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2070     uint8_t prev_intra_luma_pred_flag[4];
2071     int split   = lc->cu.part_mode == PART_NxN;
2072     int pb_size = (1 << log2_cb_size) >> split;
2073     int side    = split + 1;
2074     int chroma_mode;
2075     int i, j;
2076
2077     for (i = 0; i < side; i++)
2078         for (j = 0; j < side; j++)
2079             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2080
2081     for (i = 0; i < side; i++) {
2082         for (j = 0; j < side; j++) {
2083             if (prev_intra_luma_pred_flag[2 * i + j])
2084                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2085             else
2086                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2087
2088             lc->pu.intra_pred_mode[2 * i + j] =
2089                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2090                                      prev_intra_luma_pred_flag[2 * i + j]);
2091         }
2092     }
2093
2094     if (s->ps.sps->chroma_format_idc == 3) {
2095         for (i = 0; i < side; i++) {
2096             for (j = 0; j < side; j++) {
2097                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2098                 if (chroma_mode != 4) {
2099                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2100                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2101                     else
2102                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2103                 } else {
2104                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2105                 }
2106             }
2107         }
2108     } else if (s->ps.sps->chroma_format_idc == 2) {
2109         int mode_idx;
2110         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2111         if (chroma_mode != 4) {
2112             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2113                 mode_idx = 34;
2114             else
2115                 mode_idx = intra_chroma_table[chroma_mode];
2116         } else {
2117             mode_idx = lc->pu.intra_pred_mode[0];
2118         }
2119         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2120     } else if (s->ps.sps->chroma_format_idc != 0) {
2121         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2122         if (chroma_mode != 4) {
2123             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2124                 lc->pu.intra_pred_mode_c[0] = 34;
2125             else
2126                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2127         } else {
2128             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2129         }
2130     }
2131 }
2132
2133 static void intra_prediction_unit_default_value(HEVCContext *s,
2134                                                 int x0, int y0,
2135                                                 int log2_cb_size)
2136 {
2137     HEVCLocalContext *lc = s->HEVClc;
2138     int pb_size          = 1 << log2_cb_size;
2139     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2140     int min_pu_width     = s->ps.sps->min_pu_width;
2141     MvField *tab_mvf     = s->ref->tab_mvf;
2142     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2143     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2144     int j, k;
2145
2146     if (size_in_pus == 0)
2147         size_in_pus = 1;
2148     for (j = 0; j < size_in_pus; j++)
2149         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2150     if (lc->cu.pred_mode == MODE_INTRA)
2151         for (j = 0; j < size_in_pus; j++)
2152             for (k = 0; k < size_in_pus; k++)
2153                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2154 }
2155
2156 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2157 {
2158     int cb_size          = 1 << log2_cb_size;
2159     HEVCLocalContext *lc = s->HEVClc;
2160     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2161     int length           = cb_size >> log2_min_cb_size;
2162     int min_cb_width     = s->ps.sps->min_cb_width;
2163     int x_cb             = x0 >> log2_min_cb_size;
2164     int y_cb             = y0 >> log2_min_cb_size;
2165     int idx              = log2_cb_size - 2;
2166     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2167     int x, y, ret;
2168
2169     lc->cu.x                = x0;
2170     lc->cu.y                = y0;
2171     lc->cu.pred_mode        = MODE_INTRA;
2172     lc->cu.part_mode        = PART_2Nx2N;
2173     lc->cu.intra_split_flag = 0;
2174
2175     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2176     for (x = 0; x < 4; x++)
2177         lc->pu.intra_pred_mode[x] = 1;
2178     if (s->ps.pps->transquant_bypass_enable_flag) {
2179         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2180         if (lc->cu.cu_transquant_bypass_flag)
2181             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2182     } else
2183         lc->cu.cu_transquant_bypass_flag = 0;
2184
2185     if (s->sh.slice_type != HEVC_SLICE_I) {
2186         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2187
2188         x = y_cb * min_cb_width + x_cb;
2189         for (y = 0; y < length; y++) {
2190             memset(&s->skip_flag[x], skip_flag, length);
2191             x += min_cb_width;
2192         }
2193         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2194     } else {
2195         x = y_cb * min_cb_width + x_cb;
2196         for (y = 0; y < length; y++) {
2197             memset(&s->skip_flag[x], 0, length);
2198             x += min_cb_width;
2199         }
2200     }
2201
2202     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2203         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2204         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2205
2206         if (!s->sh.disable_deblocking_filter_flag)
2207             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2208     } else {
2209         int pcm_flag = 0;
2210
2211         if (s->sh.slice_type != HEVC_SLICE_I)
2212             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2213         if (lc->cu.pred_mode != MODE_INTRA ||
2214             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2215             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2216             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2217                                       lc->cu.pred_mode == MODE_INTRA;
2218         }
2219
2220         if (lc->cu.pred_mode == MODE_INTRA) {
2221             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2222                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2223                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2224                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2225             }
2226             if (pcm_flag) {
2227                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2228                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2229                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2230                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2231
2232                 if (ret < 0)
2233                     return ret;
2234             } else {
2235                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2236             }
2237         } else {
2238             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2239             switch (lc->cu.part_mode) {
2240             case PART_2Nx2N:
2241                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2242                 break;
2243             case PART_2NxN:
2244                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2245                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2246                 break;
2247             case PART_Nx2N:
2248                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2249                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2250                 break;
2251             case PART_2NxnU:
2252                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2253                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2254                 break;
2255             case PART_2NxnD:
2256                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2257                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2258                 break;
2259             case PART_nLx2N:
2260                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2261                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2262                 break;
2263             case PART_nRx2N:
2264                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2265                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2266                 break;
2267             case PART_NxN:
2268                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2269                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2270                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2271                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2272                 break;
2273             }
2274         }
2275
2276         if (!pcm_flag) {
2277             int rqt_root_cbf = 1;
2278
2279             if (lc->cu.pred_mode != MODE_INTRA &&
2280                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2281                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2282             }
2283             if (rqt_root_cbf) {
2284                 const static int cbf[2] = { 0 };
2285                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2286                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2287                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2288                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2289                                          log2_cb_size,
2290                                          log2_cb_size, 0, 0, cbf, cbf);
2291                 if (ret < 0)
2292                     return ret;
2293             } else {
2294                 if (!s->sh.disable_deblocking_filter_flag)
2295                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2296             }
2297         }
2298     }
2299
2300     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2301         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2302
2303     x = y_cb * min_cb_width + x_cb;
2304     for (y = 0; y < length; y++) {
2305         memset(&s->qp_y_tab[x], lc->qp_y, length);
2306         x += min_cb_width;
2307     }
2308
2309     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2310        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2311         lc->qPy_pred = lc->qp_y;
2312     }
2313
2314     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2315
2316     return 0;
2317 }
2318
2319 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2320                                int log2_cb_size, int cb_depth)
2321 {
2322     HEVCLocalContext *lc = s->HEVClc;
2323     const int cb_size    = 1 << log2_cb_size;
2324     int ret;
2325     int split_cu;
2326
2327     lc->ct_depth = cb_depth;
2328     if (x0 + cb_size <= s->ps.sps->width  &&
2329         y0 + cb_size <= s->ps.sps->height &&
2330         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2331         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2332     } else {
2333         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2334     }
2335     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2336         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2337         lc->tu.is_cu_qp_delta_coded = 0;
2338         lc->tu.cu_qp_delta          = 0;
2339     }
2340
2341     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2342         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2343         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2344     }
2345
2346     if (split_cu) {
2347         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2348         const int cb_size_split = cb_size >> 1;
2349         const int x1 = x0 + cb_size_split;
2350         const int y1 = y0 + cb_size_split;
2351
2352         int more_data = 0;
2353
2354         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2355         if (more_data < 0)
2356             return more_data;
2357
2358         if (more_data && x1 < s->ps.sps->width) {
2359             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2360             if (more_data < 0)
2361                 return more_data;
2362         }
2363         if (more_data && y1 < s->ps.sps->height) {
2364             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2365             if (more_data < 0)
2366                 return more_data;
2367         }
2368         if (more_data && x1 < s->ps.sps->width &&
2369             y1 < s->ps.sps->height) {
2370             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2371             if (more_data < 0)
2372                 return more_data;
2373         }
2374
2375         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2376             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2377             lc->qPy_pred = lc->qp_y;
2378
2379         if (more_data)
2380             return ((x1 + cb_size_split) < s->ps.sps->width ||
2381                     (y1 + cb_size_split) < s->ps.sps->height);
2382         else
2383             return 0;
2384     } else {
2385         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2386         if (ret < 0)
2387             return ret;
2388         if ((!((x0 + cb_size) %
2389                (1 << (s->ps.sps->log2_ctb_size))) ||
2390              (x0 + cb_size >= s->ps.sps->width)) &&
2391             (!((y0 + cb_size) %
2392                (1 << (s->ps.sps->log2_ctb_size))) ||
2393              (y0 + cb_size >= s->ps.sps->height))) {
2394             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2395             return !end_of_slice_flag;
2396         } else {
2397             return 1;
2398         }
2399     }
2400
2401     return 0;
2402 }
2403
2404 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2405                                  int ctb_addr_ts)
2406 {
2407     HEVCLocalContext *lc  = s->HEVClc;
2408     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2409     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2410     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2411
2412     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2413
2414     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2415         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2416             lc->first_qp_group = 1;
2417         lc->end_of_tiles_x = s->ps.sps->width;
2418     } else if (s->ps.pps->tiles_enabled_flag) {
2419         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2420             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2421             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2422             lc->first_qp_group   = 1;
2423         }
2424     } else {
2425         lc->end_of_tiles_x = s->ps.sps->width;
2426     }
2427
2428     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2429
2430     lc->boundary_flags = 0;
2431     if (s->ps.pps->tiles_enabled_flag) {
2432         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2433             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2434         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2435             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2436         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2437             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2438         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2439             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2440     } else {
2441         if (ctb_addr_in_slice <= 0)
2442             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2443         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2444             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2445     }
2446
2447     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2448     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2449     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2450     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2451 }
2452
2453 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2454 {
2455     HEVCContext *s  = avctxt->priv_data;
2456     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2457     int more_data   = 1;
2458     int x_ctb       = 0;
2459     int y_ctb       = 0;
2460     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2461     int ret;
2462
2463     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2464         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2465         return AVERROR_INVALIDDATA;
2466     }
2467
2468     if (s->sh.dependent_slice_segment_flag) {
2469         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2470         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2471             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2472             return AVERROR_INVALIDDATA;
2473         }
2474     }
2475
2476     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2477         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2478
2479         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2480         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2481         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2482
2483         ret = ff_hevc_cabac_init(s, ctb_addr_ts, 0);
2484         if (ret < 0) {
2485             s->tab_slice_address[ctb_addr_rs] = -1;
2486             return ret;
2487         }
2488
2489         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2490
2491         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2492         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2493         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2494
2495         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2496         if (more_data < 0) {
2497             s->tab_slice_address[ctb_addr_rs] = -1;
2498             return more_data;
2499         }
2500
2501
2502         ctb_addr_ts++;
2503         ff_hevc_save_states(s, ctb_addr_ts);
2504         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2505     }
2506
2507     if (x_ctb + ctb_size >= s->ps.sps->width &&
2508         y_ctb + ctb_size >= s->ps.sps->height)
2509         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2510
2511     return ctb_addr_ts;
2512 }
2513
2514 static int hls_slice_data(HEVCContext *s)
2515 {
2516     int arg[2];
2517     int ret[2];
2518
2519     arg[0] = 0;
2520     arg[1] = 1;
2521
2522     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2523     return ret[0];
2524 }
2525 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2526 {
2527     HEVCContext *s1  = avctxt->priv_data, *s;
2528     HEVCLocalContext *lc;
2529     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2530     int more_data   = 1;
2531     int *ctb_row_p    = input_ctb_row;
2532     int ctb_row = ctb_row_p[job];
2533     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2534     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2535     int thread = ctb_row % s1->threads_number;
2536     int ret;
2537
2538     s = s1->sList[self_id];
2539     lc = s->HEVClc;
2540
2541     if(ctb_row) {
2542         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2543         if (ret < 0)
2544             goto error;
2545         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2546     }
2547
2548     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2549         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2550         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2551
2552         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2553
2554         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2555
2556         if (atomic_load(&s1->wpp_err)) {
2557             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2558             return 0;
2559         }
2560
2561         ret = ff_hevc_cabac_init(s, ctb_addr_ts, thread);
2562         if (ret < 0)
2563             goto error;
2564         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2565         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2566
2567         if (more_data < 0) {
2568             ret = more_data;
2569             goto error;
2570         }
2571
2572         ctb_addr_ts++;
2573
2574         ff_hevc_save_states(s, ctb_addr_ts);
2575         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2576         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2577
2578         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2579             atomic_store(&s1->wpp_err, 1);
2580             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2581             return 0;
2582         }
2583
2584         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2585             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2586             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2587             return ctb_addr_ts;
2588         }
2589         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2590         x_ctb+=ctb_size;
2591
2592         if(x_ctb >= s->ps.sps->width) {
2593             break;
2594         }
2595     }
2596     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2597
2598     return 0;
2599 error:
2600     s->tab_slice_address[ctb_addr_rs] = -1;
2601     atomic_store(&s1->wpp_err, 1);
2602     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2603     return ret;
2604 }
2605
2606 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2607 {
2608     const uint8_t *data = nal->data;
2609     int length          = nal->size;
2610     HEVCLocalContext *lc = s->HEVClc;
2611     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2612     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2613     int64_t offset;
2614     int64_t startheader, cmpt = 0;
2615     int i, j, res = 0;
2616
2617     if (!ret || !arg) {
2618         av_free(ret);
2619         av_free(arg);
2620         return AVERROR(ENOMEM);
2621     }
2622
2623     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2624         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2625             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2626             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2627         );
2628         res = AVERROR_INVALIDDATA;
2629         goto error;
2630     }
2631
2632     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2633
2634     for (i = 1; i < s->threads_number; i++) {
2635         if (s->sList[i] && s->HEVClcList[i])
2636             continue;
2637         av_freep(&s->sList[i]);
2638         av_freep(&s->HEVClcList[i]);
2639         s->sList[i] = av_malloc(sizeof(HEVCContext));
2640         s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2641         if (!s->sList[i] || !s->HEVClcList[i]) {
2642             res = AVERROR(ENOMEM);
2643             goto error;
2644         }
2645         memcpy(s->sList[i], s, sizeof(HEVCContext));
2646         s->sList[i]->HEVClc = s->HEVClcList[i];
2647     }
2648
2649     offset = (lc->gb.index >> 3);
2650
2651     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2652         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2653             startheader--;
2654             cmpt++;
2655         }
2656     }
2657
2658     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2659         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2660         for (j = 0, cmpt = 0, startheader = offset
2661              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2662             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2663                 startheader--;
2664                 cmpt++;
2665             }
2666         }
2667         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2668         s->sh.offset[i - 1] = offset;
2669
2670     }
2671     if (s->sh.num_entry_point_offsets != 0) {
2672         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2673         if (length < offset) {
2674             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2675             res = AVERROR_INVALIDDATA;
2676             goto error;
2677         }
2678         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2679         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2680
2681     }
2682     s->data = data;
2683
2684     for (i = 1; i < s->threads_number; i++) {
2685         s->sList[i]->HEVClc->first_qp_group = 1;
2686         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2687         memcpy(s->sList[i], s, sizeof(HEVCContext));
2688         s->sList[i]->HEVClc = s->HEVClcList[i];
2689     }
2690
2691     atomic_store(&s->wpp_err, 0);
2692     ff_reset_entries(s->avctx);
2693
2694     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2695         arg[i] = i;
2696         ret[i] = 0;
2697     }
2698
2699     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2700         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2701
2702     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2703         res += ret[i];
2704 error:
2705     av_free(ret);
2706     av_free(arg);
2707     return res;
2708 }
2709
2710 static int set_side_data(HEVCContext *s)
2711 {
2712     AVFrame *out = s->ref->frame;
2713
2714     if (s->sei.frame_packing.present &&
2715         s->sei.frame_packing.arrangement_type >= 3 &&
2716         s->sei.frame_packing.arrangement_type <= 5 &&
2717         s->sei.frame_packing.content_interpretation_type > 0 &&
2718         s->sei.frame_packing.content_interpretation_type < 3) {
2719         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2720         if (!stereo)
2721             return AVERROR(ENOMEM);
2722
2723         switch (s->sei.frame_packing.arrangement_type) {
2724         case 3:
2725             if (s->sei.frame_packing.quincunx_subsampling)
2726                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2727             else
2728                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2729             break;
2730         case 4:
2731             stereo->type = AV_STEREO3D_TOPBOTTOM;
2732             break;
2733         case 5:
2734             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2735             break;
2736         }
2737
2738         if (s->sei.frame_packing.content_interpretation_type == 2)
2739             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2740
2741         if (s->sei.frame_packing.arrangement_type == 5) {
2742             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2743                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2744             else
2745                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2746         }
2747     }
2748
2749     if (s->sei.display_orientation.present &&
2750         (s->sei.display_orientation.anticlockwise_rotation ||
2751          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2752         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2753         AVFrameSideData *rotation = av_frame_new_side_data(out,
2754                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2755                                                            sizeof(int32_t) * 9);
2756         if (!rotation)
2757             return AVERROR(ENOMEM);
2758
2759         av_display_rotation_set((int32_t *)rotation->data, angle);
2760         av_display_matrix_flip((int32_t *)rotation->data,
2761                                s->sei.display_orientation.hflip,
2762                                s->sei.display_orientation.vflip);
2763     }
2764
2765     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2766     // so the side data persists for the entire coded video sequence.
2767     if (s->sei.mastering_display.present > 0 &&
2768         IS_IRAP(s) && s->no_rasl_output_flag) {
2769         s->sei.mastering_display.present--;
2770     }
2771     if (s->sei.mastering_display.present) {
2772         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2773         const int mapping[3] = {2, 0, 1};
2774         const int chroma_den = 50000;
2775         const int luma_den = 10000;
2776         int i;
2777         AVMasteringDisplayMetadata *metadata =
2778             av_mastering_display_metadata_create_side_data(out);
2779         if (!metadata)
2780             return AVERROR(ENOMEM);
2781
2782         for (i = 0; i < 3; i++) {
2783             const int j = mapping[i];
2784             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2785             metadata->display_primaries[i][0].den = chroma_den;
2786             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2787             metadata->display_primaries[i][1].den = chroma_den;
2788         }
2789         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2790         metadata->white_point[0].den = chroma_den;
2791         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2792         metadata->white_point[1].den = chroma_den;
2793
2794         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2795         metadata->max_luminance.den = luma_den;
2796         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2797         metadata->min_luminance.den = luma_den;
2798         metadata->has_luminance = 1;
2799         metadata->has_primaries = 1;
2800
2801         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2802         av_log(s->avctx, AV_LOG_DEBUG,
2803                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2804                av_q2d(metadata->display_primaries[0][0]),
2805                av_q2d(metadata->display_primaries[0][1]),
2806                av_q2d(metadata->display_primaries[1][0]),
2807                av_q2d(metadata->display_primaries[1][1]),
2808                av_q2d(metadata->display_primaries[2][0]),
2809                av_q2d(metadata->display_primaries[2][1]),
2810                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2811         av_log(s->avctx, AV_LOG_DEBUG,
2812                "min_luminance=%f, max_luminance=%f\n",
2813                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2814     }
2815     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2816     // so the side data persists for the entire coded video sequence.
2817     if (s->sei.content_light.present > 0 &&
2818         IS_IRAP(s) && s->no_rasl_output_flag) {
2819         s->sei.content_light.present--;
2820     }
2821     if (s->sei.content_light.present) {
2822         AVContentLightMetadata *metadata =
2823             av_content_light_metadata_create_side_data(out);
2824         if (!metadata)
2825             return AVERROR(ENOMEM);
2826         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2827         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2828
2829         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2830         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2831                metadata->MaxCLL, metadata->MaxFALL);
2832     }
2833
2834     if (s->sei.a53_caption.buf_ref) {
2835         HEVCSEIA53Caption *a53 = &s->sei.a53_caption;
2836
2837         AVFrameSideData *sd = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_A53_CC, a53->buf_ref);
2838         if (!sd)
2839             av_buffer_unref(&a53->buf_ref);
2840         a53->buf_ref = NULL;
2841     }
2842
2843     for (int i = 0; i < s->sei.unregistered.nb_buf_ref; i++) {
2844         HEVCSEIUnregistered *unreg = &s->sei.unregistered;
2845
2846         if (unreg->buf_ref[i]) {
2847             AVFrameSideData *sd = av_frame_new_side_data_from_buf(out,
2848                     AV_FRAME_DATA_SEI_UNREGISTERED,
2849                     unreg->buf_ref[i]);
2850             if (!sd)
2851                 av_buffer_unref(&unreg->buf_ref[i]);
2852             unreg->buf_ref[i] = NULL;
2853         }
2854     }
2855     s->sei.unregistered.nb_buf_ref = 0;
2856
2857     if (s->sei.timecode.present) {
2858         uint32_t *tc_sd;
2859         char tcbuf[AV_TIMECODE_STR_SIZE];
2860         AVFrameSideData *tcside = av_frame_new_side_data(out, AV_FRAME_DATA_S12M_TIMECODE,
2861                                                          sizeof(uint32_t) * 4);
2862         if (!tcside)
2863             return AVERROR(ENOMEM);
2864
2865         tc_sd = (uint32_t*)tcside->data;
2866         tc_sd[0] = s->sei.timecode.num_clock_ts;
2867
2868         for (int i = 0; i < tc_sd[0]; i++) {
2869             int drop = s->sei.timecode.cnt_dropped_flag[i];
2870             int   hh = s->sei.timecode.hours_value[i];
2871             int   mm = s->sei.timecode.minutes_value[i];
2872             int   ss = s->sei.timecode.seconds_value[i];
2873             int   ff = s->sei.timecode.n_frames[i];
2874
2875             tc_sd[i + 1] = av_timecode_get_smpte(s->avctx->framerate, drop, hh, mm, ss, ff);
2876             av_timecode_make_smpte_tc_string2(tcbuf, s->avctx->framerate, tc_sd[i + 1], 0, 0);
2877             av_dict_set(&out->metadata, "timecode", tcbuf, 0);
2878         }
2879
2880         s->sei.timecode.num_clock_ts = 0;
2881     }
2882
2883     if (s->sei.dynamic_hdr_plus.info) {
2884         AVBufferRef *info_ref = av_buffer_ref(s->sei.dynamic_hdr_plus.info);
2885         if (!info_ref)
2886             return AVERROR(ENOMEM);
2887
2888         if (!av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DYNAMIC_HDR_PLUS, info_ref)) {
2889             av_buffer_unref(&info_ref);
2890             return AVERROR(ENOMEM);
2891         }
2892     }
2893
2894     return 0;
2895 }
2896
2897 static int hevc_frame_start(HEVCContext *s)
2898 {
2899     HEVCLocalContext *lc = s->HEVClc;
2900     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2901                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2902     int ret;
2903
2904     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2905     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2906     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2907     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2908     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2909
2910     s->is_decoded        = 0;
2911     s->first_nal_type    = s->nal_unit_type;
2912
2913     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2914
2915     if (s->ps.pps->tiles_enabled_flag)
2916         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2917
2918     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2919     if (ret < 0)
2920         goto fail;
2921
2922     ret = ff_hevc_frame_rps(s);
2923     if (ret < 0) {
2924         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2925         goto fail;
2926     }
2927
2928     s->ref->frame->key_frame = IS_IRAP(s);
2929
2930     ret = set_side_data(s);
2931     if (ret < 0)
2932         goto fail;
2933
2934     s->frame->pict_type = 3 - s->sh.slice_type;
2935
2936     if (!IS_IRAP(s))
2937         ff_hevc_bump_frame(s);
2938
2939     av_frame_unref(s->output_frame);
2940     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2941     if (ret < 0)
2942         goto fail;
2943
2944     if (!s->avctx->hwaccel)
2945         ff_thread_finish_setup(s->avctx);
2946
2947     return 0;
2948
2949 fail:
2950     if (s->ref)
2951         ff_hevc_unref_frame(s, s->ref, ~0);
2952     s->ref = NULL;
2953     return ret;
2954 }
2955
2956 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2957 {
2958     HEVCLocalContext *lc = s->HEVClc;
2959     GetBitContext *gb    = &lc->gb;
2960     int ctb_addr_ts, ret;
2961
2962     *gb              = nal->gb;
2963     s->nal_unit_type = nal->type;
2964     s->temporal_id   = nal->temporal_id;
2965
2966     switch (s->nal_unit_type) {
2967     case HEVC_NAL_VPS:
2968         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2969             ret = s->avctx->hwaccel->decode_params(s->avctx,
2970                                                    nal->type,
2971                                                    nal->raw_data,
2972                                                    nal->raw_size);
2973             if (ret < 0)
2974                 goto fail;
2975         }
2976         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2977         if (ret < 0)
2978             goto fail;
2979         break;
2980     case HEVC_NAL_SPS:
2981         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2982             ret = s->avctx->hwaccel->decode_params(s->avctx,
2983                                                    nal->type,
2984                                                    nal->raw_data,
2985                                                    nal->raw_size);
2986             if (ret < 0)
2987                 goto fail;
2988         }
2989         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2990                                      s->apply_defdispwin);
2991         if (ret < 0)
2992             goto fail;
2993         break;
2994     case HEVC_NAL_PPS:
2995         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2996             ret = s->avctx->hwaccel->decode_params(s->avctx,
2997                                                    nal->type,
2998                                                    nal->raw_data,
2999                                                    nal->raw_size);
3000             if (ret < 0)
3001                 goto fail;
3002         }
3003         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
3004         if (ret < 0)
3005             goto fail;
3006         break;
3007     case HEVC_NAL_SEI_PREFIX:
3008     case HEVC_NAL_SEI_SUFFIX:
3009         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
3010             ret = s->avctx->hwaccel->decode_params(s->avctx,
3011                                                    nal->type,
3012                                                    nal->raw_data,
3013                                                    nal->raw_size);
3014             if (ret < 0)
3015                 goto fail;
3016         }
3017         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
3018         if (ret < 0)
3019             goto fail;
3020         break;
3021     case HEVC_NAL_TRAIL_R:
3022     case HEVC_NAL_TRAIL_N:
3023     case HEVC_NAL_TSA_N:
3024     case HEVC_NAL_TSA_R:
3025     case HEVC_NAL_STSA_N:
3026     case HEVC_NAL_STSA_R:
3027     case HEVC_NAL_BLA_W_LP:
3028     case HEVC_NAL_BLA_W_RADL:
3029     case HEVC_NAL_BLA_N_LP:
3030     case HEVC_NAL_IDR_W_RADL:
3031     case HEVC_NAL_IDR_N_LP:
3032     case HEVC_NAL_CRA_NUT:
3033     case HEVC_NAL_RADL_N:
3034     case HEVC_NAL_RADL_R:
3035     case HEVC_NAL_RASL_N:
3036     case HEVC_NAL_RASL_R:
3037         ret = hls_slice_header(s);
3038         if (ret < 0)
3039             return ret;
3040         if (ret == 1) {
3041             ret = AVERROR_INVALIDDATA;
3042             goto fail;
3043         }
3044
3045
3046         if (
3047             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
3048             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
3049             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
3050             break;
3051         }
3052
3053         if (s->sh.first_slice_in_pic_flag) {
3054             if (s->max_ra == INT_MAX) {
3055                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
3056                     s->max_ra = s->poc;
3057                 } else {
3058                     if (IS_IDR(s))
3059                         s->max_ra = INT_MIN;
3060                 }
3061             }
3062
3063             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
3064                 s->poc <= s->max_ra) {
3065                 s->is_decoded = 0;
3066                 break;
3067             } else {
3068                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
3069                     s->max_ra = INT_MIN;
3070             }
3071
3072             s->overlap ++;
3073             ret = hevc_frame_start(s);
3074             if (ret < 0)
3075                 return ret;
3076         } else if (!s->ref) {
3077             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
3078             goto fail;
3079         }
3080
3081         if (s->nal_unit_type != s->first_nal_type) {
3082             av_log(s->avctx, AV_LOG_ERROR,
3083                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
3084                    s->first_nal_type, s->nal_unit_type);
3085             return AVERROR_INVALIDDATA;
3086         }
3087
3088         if (!s->sh.dependent_slice_segment_flag &&
3089             s->sh.slice_type != HEVC_SLICE_I) {
3090             ret = ff_hevc_slice_rpl(s);
3091             if (ret < 0) {
3092                 av_log(s->avctx, AV_LOG_WARNING,
3093                        "Error constructing the reference lists for the current slice.\n");
3094                 goto fail;
3095             }
3096         }
3097
3098         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
3099             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
3100             if (ret < 0)
3101                 goto fail;
3102         }
3103
3104         if (s->avctx->hwaccel) {
3105             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3106             if (ret < 0)
3107                 goto fail;
3108         } else {
3109             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3110                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3111             else
3112                 ctb_addr_ts = hls_slice_data(s);
3113             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3114                 s->is_decoded = 1;
3115             }
3116
3117             if (ctb_addr_ts < 0) {
3118                 ret = ctb_addr_ts;
3119                 goto fail;
3120             }
3121         }
3122         break;
3123     case HEVC_NAL_EOS_NUT:
3124     case HEVC_NAL_EOB_NUT:
3125         s->seq_decode = (s->seq_decode + 1) & 0xff;
3126         s->max_ra     = INT_MAX;
3127         break;
3128     case HEVC_NAL_AUD:
3129     case HEVC_NAL_FD_NUT:
3130         break;
3131     default:
3132         av_log(s->avctx, AV_LOG_INFO,
3133                "Skipping NAL unit %d\n", s->nal_unit_type);
3134     }
3135
3136     return 0;
3137 fail:
3138     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3139         return ret;
3140     return 0;
3141 }
3142
3143 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3144 {
3145     int i, ret = 0;
3146     int eos_at_start = 1;
3147
3148     s->ref = NULL;
3149     s->last_eos = s->eos;
3150     s->eos = 0;
3151     s->overlap = 0;
3152
3153     /* split the input packet into NAL units, so we know the upper bound on the
3154      * number of slices in the frame */
3155     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3156                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3157     if (ret < 0) {
3158         av_log(s->avctx, AV_LOG_ERROR,
3159                "Error splitting the input into NAL units.\n");
3160         return ret;
3161     }
3162
3163     for (i = 0; i < s->pkt.nb_nals; i++) {
3164         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3165             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3166             if (eos_at_start) {
3167                 s->last_eos = 1;
3168             } else {
3169                 s->eos = 1;
3170             }
3171         } else {
3172             eos_at_start = 0;
3173         }
3174     }
3175
3176     /* decode the NAL units */
3177     for (i = 0; i < s->pkt.nb_nals; i++) {
3178         H2645NAL *nal = &s->pkt.nals[i];
3179
3180         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3181             (s->avctx->skip_frame >= AVDISCARD_NONREF
3182             && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
3183             continue;
3184
3185         ret = decode_nal_unit(s, nal);
3186         if (ret >= 0 && s->overlap > 2)
3187             ret = AVERROR_INVALIDDATA;
3188         if (ret < 0) {
3189             av_log(s->avctx, AV_LOG_WARNING,
3190                    "Error parsing NAL unit #%d.\n", i);
3191             goto fail;
3192         }
3193     }
3194
3195 fail:
3196     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3197         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3198
3199     return ret;
3200 }
3201
3202 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3203 {
3204     int i;
3205     for (i = 0; i < 16; i++)
3206         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3207 }
3208
3209 static int verify_md5(HEVCContext *s, AVFrame *frame)
3210 {
3211     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3212     int pixel_shift;
3213     int i, j;
3214
3215     if (!desc)
3216         return AVERROR(EINVAL);
3217
3218     pixel_shift = desc->comp[0].depth > 8;
3219
3220     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3221            s->poc);
3222
3223     /* the checksums are LE, so we have to byteswap for >8bpp formats
3224      * on BE arches */
3225 #if HAVE_BIGENDIAN
3226     if (pixel_shift && !s->checksum_buf) {
3227         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3228                        FFMAX3(frame->linesize[0], frame->linesize[1],
3229                               frame->linesize[2]));
3230         if (!s->checksum_buf)
3231             return AVERROR(ENOMEM);
3232     }
3233 #endif
3234
3235     for (i = 0; frame->data[i]; i++) {
3236         int width  = s->avctx->coded_width;
3237         int height = s->avctx->coded_height;
3238         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3239         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3240         uint8_t md5[16];
3241
3242         av_md5_init(s->md5_ctx);
3243         for (j = 0; j < h; j++) {
3244             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3245 #if HAVE_BIGENDIAN
3246             if (pixel_shift) {
3247                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3248                                     (const uint16_t *) src, w);
3249                 src = s->checksum_buf;
3250             }
3251 #endif
3252             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3253         }
3254         av_md5_final(s->md5_ctx, md5);
3255
3256         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3257             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3258             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3259             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3260         } else {
3261             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3262             print_md5(s->avctx, AV_LOG_ERROR, md5);
3263             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3264             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3265             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3266             return AVERROR_INVALIDDATA;
3267         }
3268     }
3269
3270     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3271
3272     return 0;
3273 }
3274
3275 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3276 {
3277     int ret, i;
3278
3279     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3280                                    &s->nal_length_size, s->avctx->err_recognition,
3281                                    s->apply_defdispwin, s->avctx);
3282     if (ret < 0)
3283         return ret;
3284
3285     /* export stream parameters from the first SPS */
3286     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3287         if (first && s->ps.sps_list[i]) {
3288             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3289             export_stream_params(s, sps);
3290             break;
3291         }
3292     }
3293
3294     /* export stream parameters from SEI */
3295     ret = export_stream_params_from_sei(s);
3296     if (ret < 0)
3297         return ret;
3298
3299     return 0;
3300 }
3301
3302 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3303                              AVPacket *avpkt)
3304 {
3305     int ret;
3306     buffer_size_t new_extradata_size;
3307     uint8_t *new_extradata;
3308     HEVCContext *s = avctx->priv_data;
3309
3310     if (!avpkt->size) {
3311         ret = ff_hevc_output_frame(s, data, 1);
3312         if (ret < 0)
3313             return ret;
3314
3315         *got_output = ret;
3316         return 0;
3317     }
3318
3319     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3320                                             &new_extradata_size);
3321     if (new_extradata && new_extradata_size > 0) {
3322         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3323         if (ret < 0)
3324             return ret;
3325     }
3326
3327     s->ref = NULL;
3328     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3329     if (ret < 0)
3330         return ret;
3331
3332     if (avctx->hwaccel) {
3333         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3334             av_log(avctx, AV_LOG_ERROR,
3335                    "hardware accelerator failed to decode picture\n");
3336             ff_hevc_unref_frame(s, s->ref, ~0);
3337             return ret;
3338         }
3339     } else {
3340         /* verify the SEI checksum */
3341         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3342             s->sei.picture_hash.is_md5) {
3343             ret = verify_md5(s, s->ref->frame);
3344             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3345                 ff_hevc_unref_frame(s, s->ref, ~0);
3346                 return ret;
3347             }
3348         }
3349     }
3350     s->sei.picture_hash.is_md5 = 0;
3351
3352     if (s->is_decoded) {
3353         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3354         s->is_decoded = 0;
3355     }
3356
3357     if (s->output_frame->buf[0]) {
3358         av_frame_move_ref(data, s->output_frame);
3359         *got_output = 1;
3360     }
3361
3362     return avpkt->size;
3363 }
3364
3365 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3366 {
3367     int ret;
3368
3369     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3370     if (ret < 0)
3371         return ret;
3372
3373     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3374     if (!dst->tab_mvf_buf)
3375         goto fail;
3376     dst->tab_mvf = src->tab_mvf;
3377
3378     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3379     if (!dst->rpl_tab_buf)
3380         goto fail;
3381     dst->rpl_tab = src->rpl_tab;
3382
3383     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3384     if (!dst->rpl_buf)
3385         goto fail;
3386
3387     dst->poc        = src->poc;
3388     dst->ctb_count  = src->ctb_count;
3389     dst->flags      = src->flags;
3390     dst->sequence   = src->sequence;
3391
3392     if (src->hwaccel_picture_private) {
3393         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3394         if (!dst->hwaccel_priv_buf)
3395             goto fail;
3396         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3397     }
3398
3399     return 0;
3400 fail:
3401     ff_hevc_unref_frame(s, dst, ~0);
3402     return AVERROR(ENOMEM);
3403 }
3404
3405 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3406 {
3407     HEVCContext       *s = avctx->priv_data;
3408     int i;
3409
3410     pic_arrays_free(s);
3411
3412     av_freep(&s->md5_ctx);
3413
3414     av_freep(&s->cabac_state);
3415
3416     for (i = 0; i < 3; i++) {
3417         av_freep(&s->sao_pixel_buffer_h[i]);
3418         av_freep(&s->sao_pixel_buffer_v[i]);
3419     }
3420     av_frame_free(&s->output_frame);
3421
3422     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3423         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3424         av_frame_free(&s->DPB[i].frame);
3425     }
3426
3427     ff_hevc_ps_uninit(&s->ps);
3428
3429     av_freep(&s->sh.entry_point_offset);
3430     av_freep(&s->sh.offset);
3431     av_freep(&s->sh.size);
3432
3433     if (s->HEVClcList && s->sList) {
3434         for (i = 1; i < s->threads_number; i++) {
3435             av_freep(&s->HEVClcList[i]);
3436             av_freep(&s->sList[i]);
3437         }
3438     }
3439     av_freep(&s->HEVClc);
3440     av_freep(&s->HEVClcList);
3441     av_freep(&s->sList);
3442
3443     ff_h2645_packet_uninit(&s->pkt);
3444
3445     ff_hevc_reset_sei(&s->sei);
3446
3447     return 0;
3448 }
3449
3450 static av_cold int hevc_init_context(AVCodecContext *avctx)
3451 {
3452     HEVCContext *s = avctx->priv_data;
3453     int i;
3454
3455     s->avctx = avctx;
3456
3457     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3458     s->HEVClcList = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number);
3459     s->sList = av_mallocz(sizeof(HEVCContext*) * s->threads_number);
3460     if (!s->HEVClc || !s->HEVClcList || !s->sList)
3461         goto fail;
3462     s->HEVClcList[0] = s->HEVClc;
3463     s->sList[0] = s;
3464
3465     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3466     if (!s->cabac_state)
3467         goto fail;
3468
3469     s->output_frame = av_frame_alloc();
3470     if (!s->output_frame)
3471         goto fail;
3472
3473     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3474         s->DPB[i].frame = av_frame_alloc();
3475         if (!s->DPB[i].frame)
3476             goto fail;
3477         s->DPB[i].tf.f = s->DPB[i].frame;
3478     }
3479
3480     s->max_ra = INT_MAX;
3481
3482     s->md5_ctx = av_md5_alloc();
3483     if (!s->md5_ctx)
3484         goto fail;
3485
3486     ff_bswapdsp_init(&s->bdsp);
3487
3488     s->context_initialized = 1;
3489     s->eos = 0;
3490
3491     ff_hevc_reset_sei(&s->sei);
3492
3493     return 0;
3494
3495 fail:
3496     hevc_decode_free(avctx);
3497     return AVERROR(ENOMEM);
3498 }
3499
3500 #if HAVE_THREADS
3501 static int hevc_update_thread_context(AVCodecContext *dst,
3502                                       const AVCodecContext *src)
3503 {
3504     HEVCContext *s  = dst->priv_data;
3505     HEVCContext *s0 = src->priv_data;
3506     int i, ret;
3507
3508     if (!s->context_initialized) {
3509         ret = hevc_init_context(dst);
3510         if (ret < 0)
3511             return ret;
3512     }
3513
3514     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3515         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3516         if (s0->DPB[i].frame->buf[0]) {
3517             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3518             if (ret < 0)
3519                 return ret;
3520         }
3521     }
3522
3523     if (s->ps.sps != s0->ps.sps)
3524         s->ps.sps = NULL;
3525     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3526         ret = av_buffer_replace(&s->ps.vps_list[i], s0->ps.vps_list[i]);
3527         if (ret < 0)
3528             return ret;
3529     }
3530
3531     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3532         ret = av_buffer_replace(&s->ps.sps_list[i], s0->ps.sps_list[i]);
3533         if (ret < 0)
3534             return ret;
3535     }
3536
3537     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3538         ret = av_buffer_replace(&s->ps.pps_list[i], s0->ps.pps_list[i]);
3539         if (ret < 0)
3540             return ret;
3541     }
3542
3543     if (s->ps.sps != s0->ps.sps)
3544         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3545             return ret;
3546
3547     s->seq_decode = s0->seq_decode;
3548     s->seq_output = s0->seq_output;
3549     s->pocTid0    = s0->pocTid0;
3550     s->max_ra     = s0->max_ra;
3551     s->eos        = s0->eos;
3552     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3553
3554     s->is_nalff        = s0->is_nalff;
3555     s->nal_length_size = s0->nal_length_size;
3556
3557     s->threads_number      = s0->threads_number;
3558     s->threads_type        = s0->threads_type;
3559
3560     if (s0->eos) {
3561         s->seq_decode = (s->seq_decode + 1) & 0xff;
3562         s->max_ra = INT_MAX;
3563     }
3564
3565     ret = av_buffer_replace(&s->sei.a53_caption.buf_ref, s0->sei.a53_caption.buf_ref);
3566     if (ret < 0)
3567         return ret;
3568
3569     for (i = 0; i < s->sei.unregistered.nb_buf_ref; i++)
3570         av_buffer_unref(&s->sei.unregistered.buf_ref[i]);
3571     s->sei.unregistered.nb_buf_ref = 0;
3572
3573     if (s0->sei.unregistered.nb_buf_ref) {
3574         ret = av_reallocp_array(&s->sei.unregistered.buf_ref,
3575                                 s0->sei.unregistered.nb_buf_ref,
3576                                 sizeof(*s->sei.unregistered.buf_ref));
3577         if (ret < 0)
3578             return ret;
3579
3580         for (i = 0; i < s0->sei.unregistered.nb_buf_ref; i++) {
3581             s->sei.unregistered.buf_ref[i] = av_buffer_ref(s0->sei.unregistered.buf_ref[i]);
3582             if (!s->sei.unregistered.buf_ref[i])
3583                 return AVERROR(ENOMEM);
3584             s->sei.unregistered.nb_buf_ref++;
3585         }
3586     }
3587
3588     ret = av_buffer_replace(&s->sei.dynamic_hdr_plus.info, s0->sei.dynamic_hdr_plus.info);
3589     if (ret < 0)
3590         return ret;
3591
3592     s->sei.frame_packing        = s0->sei.frame_packing;
3593     s->sei.display_orientation  = s0->sei.display_orientation;
3594     s->sei.mastering_display    = s0->sei.mastering_display;
3595     s->sei.content_light        = s0->sei.content_light;
3596     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3597
3598     ret = export_stream_params_from_sei(s);
3599     if (ret < 0)
3600         return ret;
3601
3602     return 0;
3603 }
3604 #endif
3605
3606 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3607 {
3608     HEVCContext *s = avctx->priv_data;
3609     int ret;
3610
3611     if(avctx->active_thread_type & FF_THREAD_SLICE)
3612         s->threads_number = avctx->thread_count;
3613     else
3614         s->threads_number = 1;
3615
3616     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3617         s->threads_type = FF_THREAD_FRAME;
3618     else
3619         s->threads_type = FF_THREAD_SLICE;
3620
3621     ret = hevc_init_context(avctx);
3622     if (ret < 0)
3623         return ret;
3624
3625     s->enable_parallel_tiles = 0;
3626     s->sei.picture_timing.picture_struct = 0;
3627     s->eos = 1;
3628
3629     atomic_init(&s->wpp_err, 0);
3630
3631     if (!avctx->internal->is_copy) {
3632         if (avctx->extradata_size > 0 && avctx->extradata) {
3633             ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3634             if (ret < 0) {
3635                 return ret;
3636             }
3637         }
3638     }
3639
3640     return 0;
3641 }
3642
3643 static void hevc_decode_flush(AVCodecContext *avctx)
3644 {
3645     HEVCContext *s = avctx->priv_data;
3646     ff_hevc_flush_dpb(s);
3647     ff_hevc_reset_sei(&s->sei);
3648     s->max_ra = INT_MAX;
3649     s->eos = 1;
3650 }
3651
3652 #define OFFSET(x) offsetof(HEVCContext, x)
3653 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3654
3655 static const AVOption options[] = {
3656     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3657         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3658     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3659         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3660     { NULL },
3661 };
3662
3663 static const AVClass hevc_decoder_class = {
3664     .class_name = "HEVC decoder",
3665     .item_name  = av_default_item_name,
3666     .option     = options,
3667     .version    = LIBAVUTIL_VERSION_INT,
3668 };
3669
3670 AVCodec ff_hevc_decoder = {
3671     .name                  = "hevc",
3672     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3673     .type                  = AVMEDIA_TYPE_VIDEO,
3674     .id                    = AV_CODEC_ID_HEVC,
3675     .priv_data_size        = sizeof(HEVCContext),
3676     .priv_class            = &hevc_decoder_class,
3677     .init                  = hevc_decode_init,
3678     .close                 = hevc_decode_free,
3679     .decode                = hevc_decode_frame,
3680     .flush                 = hevc_decode_flush,
3681     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3682     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3683                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3684     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |
3685                              FF_CODEC_CAP_ALLOCATE_PROGRESS | FF_CODEC_CAP_INIT_CLEANUP,
3686     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3687     .hw_configs            = (const AVCodecHWConfigInternal *const []) {
3688 #if CONFIG_HEVC_DXVA2_HWACCEL
3689                                HWACCEL_DXVA2(hevc),
3690 #endif
3691 #if CONFIG_HEVC_D3D11VA_HWACCEL
3692                                HWACCEL_D3D11VA(hevc),
3693 #endif
3694 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3695                                HWACCEL_D3D11VA2(hevc),
3696 #endif
3697 #if CONFIG_HEVC_NVDEC_HWACCEL
3698                                HWACCEL_NVDEC(hevc),
3699 #endif
3700 #if CONFIG_HEVC_VAAPI_HWACCEL
3701                                HWACCEL_VAAPI(hevc),
3702 #endif
3703 #if CONFIG_HEVC_VDPAU_HWACCEL
3704                                HWACCEL_VDPAU(hevc),
3705 #endif
3706 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3707                                HWACCEL_VIDEOTOOLBOX(hevc),
3708 #endif
3709                                NULL
3710                            },
3711 };