git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35 #include "libavutil/timecode.h"
  36
  37 #include "bswapdsp.h"
  38 #include "bytestream.h"
  39 #include "cabac_functions.h"
  40 #include "golomb.h"
  41 #include "hevc.h"
  42 #include "hevc_data.h"
  43 #include "hevc_parse.h"
  44 #include "hevcdec.h"
  45 #include "hwconfig.h"
  46 #include "profiles.h"
  47
  48 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  49
  50 /**
  51  * NOTE: Each function hls_foo correspond to the function foo in the
  52  * specification (HLS stands for High Level Syntax).
  53  */
  54
  55 /**
  56  * Section 5.7
  57  */
  58
  59 /* free everything allocated  by pic_arrays_init() */
  60 static void pic_arrays_free(HEVCContext *s)
  61 {
  62     av_freep(&s->sao);
  63     av_freep(&s->deblock);
  64
  65     av_freep(&s->skip_flag);
  66     av_freep(&s->tab_ct_depth);
  67
  68     av_freep(&s->tab_ipm);
  69     av_freep(&s->cbf_luma);
  70     av_freep(&s->is_pcm);
  71
  72     av_freep(&s->qp_y_tab);
  73     av_freep(&s->tab_slice_address);
  74     av_freep(&s->filter_slice_edges);
  75
  76     av_freep(&s->horizontal_bs);
  77     av_freep(&s->vertical_bs);
  78
  79     av_freep(&s->sh.entry_point_offset);
  80     av_freep(&s->sh.size);
  81     av_freep(&s->sh.offset);
  82
  83     av_buffer_pool_uninit(&s->tab_mvf_pool);
  84     av_buffer_pool_uninit(&s->rpl_tab_pool);
  85 }
  86
  87 /* allocate arrays that depend on frame dimensions */
  88 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  89 {
  90     int log2_min_cb_size = sps->log2_min_cb_size;
  91     int width            = sps->width;
  92     int height           = sps->height;
  93     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  94                            ((height >> log2_min_cb_size) + 1);
  95     int ctb_count        = sps->ctb_width * sps->ctb_height;
  96     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  97
  98     s->bs_width  = (width  >> 2) + 1;
  99     s->bs_height = (height >> 2) + 1;
 100
 101     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 102     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 103     if (!s->sao || !s->deblock)
 104         goto fail;
 105
 106     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 107     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 108     if (!s->skip_flag || !s->tab_ct_depth)
 109         goto fail;
 110
 111     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 112     s->tab_ipm  = av_mallocz(min_pu_size);
 113     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 114     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 115         goto fail;
 116
 117     s->filter_slice_edges = av_mallocz(ctb_count);
 118     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 119                                       sizeof(*s->tab_slice_address));
 120     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 121                                       sizeof(*s->qp_y_tab));
 122     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 123         goto fail;
 124
 125     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 126     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 127     if (!s->horizontal_bs || !s->vertical_bs)
 128         goto fail;
 129
 130     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 131                                           av_buffer_allocz);
 132     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 133                                           av_buffer_allocz);
 134     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 135         goto fail;
 136
 137     return 0;
 138
 139 fail:
 140     pic_arrays_free(s);
 141     return AVERROR(ENOMEM);
 142 }
 143
 144 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 145 {
 146     int i = 0;
 147     int j = 0;
 148     uint8_t luma_weight_l0_flag[16];
 149     uint8_t chroma_weight_l0_flag[16];
 150     uint8_t luma_weight_l1_flag[16];
 151     uint8_t chroma_weight_l1_flag[16];
 152     int luma_log2_weight_denom;
 153
 154     luma_log2_weight_denom = get_ue_golomb_long(gb);
 155     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
 156         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 157         return AVERROR_INVALIDDATA;
 158     }
 159     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 160     if (s->ps.sps->chroma_format_idc != 0) {
 161         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
 162         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
 163             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
 164             return AVERROR_INVALIDDATA;
 165         }
 166         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
 167     }
 168
 169     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 170         luma_weight_l0_flag[i] = get_bits1(gb);
 171         if (!luma_weight_l0_flag[i]) {
 172             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 173             s->sh.luma_offset_l0[i] = 0;
 174         }
 175     }
 176     if (s->ps.sps->chroma_format_idc != 0) {
 177         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 178             chroma_weight_l0_flag[i] = get_bits1(gb);
 179     } else {
 180         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 181             chroma_weight_l0_flag[i] = 0;
 182     }
 183     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 184         if (luma_weight_l0_flag[i]) {
 185             int delta_luma_weight_l0 = get_se_golomb(gb);
 186             if ((int8_t)delta_luma_weight_l0 != delta_luma_weight_l0)
 187                 return AVERROR_INVALIDDATA;
 188             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 189             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 190         }
 191         if (chroma_weight_l0_flag[i]) {
 192             for (j = 0; j < 2; j++) {
 193                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 194                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 195
 196                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
 197                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
 198                     return AVERROR_INVALIDDATA;
 199                 }
 200
 201                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 202                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 203                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 204             }
 205         } else {
 206             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 207             s->sh.chroma_offset_l0[i][0] = 0;
 208             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 209             s->sh.chroma_offset_l0[i][1] = 0;
 210         }
 211     }
 212     if (s->sh.slice_type == HEVC_SLICE_B) {
 213         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 214             luma_weight_l1_flag[i] = get_bits1(gb);
 215             if (!luma_weight_l1_flag[i]) {
 216                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 217                 s->sh.luma_offset_l1[i] = 0;
 218             }
 219         }
 220         if (s->ps.sps->chroma_format_idc != 0) {
 221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 222                 chroma_weight_l1_flag[i] = get_bits1(gb);
 223         } else {
 224             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 225                 chroma_weight_l1_flag[i] = 0;
 226         }
 227         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 228             if (luma_weight_l1_flag[i]) {
 229                 int delta_luma_weight_l1 = get_se_golomb(gb);
 230                 if ((int8_t)delta_luma_weight_l1 != delta_luma_weight_l1)
 231                     return AVERROR_INVALIDDATA;
 232                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 233                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 234             }
 235             if (chroma_weight_l1_flag[i]) {
 236                 for (j = 0; j < 2; j++) {
 237                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 238                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 239
 240                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
 241                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
 242                         return AVERROR_INVALIDDATA;
 243                     }
 244
 245                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 246                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 247                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 248                 }
 249             } else {
 250                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 251                 s->sh.chroma_offset_l1[i][0] = 0;
 252                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 253                 s->sh.chroma_offset_l1[i][1] = 0;
 254             }
 255         }
 256     }
 257     return 0;
 258 }
 259
 260 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 261 {
 262     const HEVCSPS *sps = s->ps.sps;
 263     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 264     int prev_delta_msb = 0;
 265     unsigned int nb_sps = 0, nb_sh;
 266     int i;
 267
 268     rps->nb_refs = 0;
 269     if (!sps->long_term_ref_pics_present_flag)
 270         return 0;
 271
 272     if (sps->num_long_term_ref_pics_sps > 0)
 273         nb_sps = get_ue_golomb_long(gb);
 274     nb_sh = get_ue_golomb_long(gb);
 275
 276     if (nb_sps > sps->num_long_term_ref_pics_sps)
 277         return AVERROR_INVALIDDATA;
 278     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 279         return AVERROR_INVALIDDATA;
 280
 281     rps->nb_refs = nb_sh + nb_sps;
 282
 283     for (i = 0; i < rps->nb_refs; i++) {
 284
 285         if (i < nb_sps) {
 286             uint8_t lt_idx_sps = 0;
 287
 288             if (sps->num_long_term_ref_pics_sps > 1)
 289                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 290
 291             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 292             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 293         } else {
 294             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 295             rps->used[i] = get_bits1(gb);
 296         }
 297
 298         rps->poc_msb_present[i] = get_bits1(gb);
 299         if (rps->poc_msb_present[i]) {
 300             int64_t delta = get_ue_golomb_long(gb);
 301             int64_t poc;
 302
 303             if (i && i != nb_sps)
 304                 delta += prev_delta_msb;
 305
 306             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 307             if (poc != (int32_t)poc)
 308                 return AVERROR_INVALIDDATA;
 309             rps->poc[i] = poc;
 310             prev_delta_msb = delta;
 311         }
 312     }
 313
 314     return 0;
 315 }
 316
 317 static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
 318 {
 319     AVCodecContext *avctx = s->avctx;
 320     const HEVCParamSets *ps = &s->ps;
 321     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 322     const HEVCWindow *ow = &sps->output_window;
 323     unsigned int num = 0, den = 0;
 324
 325     avctx->pix_fmt             = sps->pix_fmt;
 326     avctx->coded_width         = sps->width;
 327     avctx->coded_height        = sps->height;
 328     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 329     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 330     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 331     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 332     avctx->level               = sps->ptl.general_ptl.level_idc;
 333
 334     ff_set_sar(avctx, sps->vui.sar);
 335
 336     if (sps->vui.video_signal_type_present_flag)
 337         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 338                                                             : AVCOL_RANGE_MPEG;
 339     else
 340         avctx->color_range = AVCOL_RANGE_MPEG;
 341
 342     if (sps->vui.colour_description_present_flag) {
 343         avctx->color_primaries = sps->vui.colour_primaries;
 344         avctx->color_trc       = sps->vui.transfer_characteristic;
 345         avctx->colorspace      = sps->vui.matrix_coeffs;
 346     } else {
 347         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 348         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 349         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 350     }
 351
 352     avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
 353     if (sps->chroma_format_idc == 1) {
 354         if (sps->vui.chroma_loc_info_present_flag) {
 355             if (sps->vui.chroma_sample_loc_type_top_field <= 5)
 356                 avctx->chroma_sample_location = sps->vui.chroma_sample_loc_type_top_field + 1;
 357         } else
 358             avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
 359     }
 360
 361     if (vps->vps_timing_info_present_flag) {
 362         num = vps->vps_num_units_in_tick;
 363         den = vps->vps_time_scale;
 364     } else if (sps->vui.vui_timing_info_present_flag) {
 365         num = sps->vui.vui_num_units_in_tick;
 366         den = sps->vui.vui_time_scale;
 367     }
 368
 369     if (num != 0 && den != 0)
 370         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 371                   num, den, 1 << 30);
 372 }
 373
 374 static int export_stream_params_from_sei(HEVCContext *s)
 375 {
 376     AVCodecContext *avctx = s->avctx;
 377
 378     if (s->sei.a53_caption.buf_ref)
 379         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
 380
 381     if (s->sei.alternative_transfer.present &&
 382         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
 383         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
 384         avctx->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
 385     }
 386
 387     return 0;
 388 }
 389
 390 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 391 {
 392 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
 393                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 394                      CONFIG_HEVC_NVDEC_HWACCEL + \
 395                      CONFIG_HEVC_VAAPI_HWACCEL + \
 396                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
 397                      CONFIG_HEVC_VDPAU_HWACCEL)
 398     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 399
 400     switch (sps->pix_fmt) {
 401     case AV_PIX_FMT_YUV420P:
 402     case AV_PIX_FMT_YUVJ420P:
 403 #if CONFIG_HEVC_DXVA2_HWACCEL
 404         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 405 #endif
 406 #if CONFIG_HEVC_D3D11VA_HWACCEL
 407         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 408         *fmt++ = AV_PIX_FMT_D3D11;
 409 #endif
 410 #if CONFIG_HEVC_VAAPI_HWACCEL
 411         *fmt++ = AV_PIX_FMT_VAAPI;
 412 #endif
 413 #if CONFIG_HEVC_VDPAU_HWACCEL
 414         *fmt++ = AV_PIX_FMT_VDPAU;
 415 #endif
 416 #if CONFIG_HEVC_NVDEC_HWACCEL
 417         *fmt++ = AV_PIX_FMT_CUDA;
 418 #endif
 419 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 420         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 421 #endif
 422         break;
 423     case AV_PIX_FMT_YUV420P10:
 424 #if CONFIG_HEVC_DXVA2_HWACCEL
 425         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 426 #endif
 427 #if CONFIG_HEVC_D3D11VA_HWACCEL
 428         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 429         *fmt++ = AV_PIX_FMT_D3D11;
 430 #endif
 431 #if CONFIG_HEVC_VAAPI_HWACCEL
 432         *fmt++ = AV_PIX_FMT_VAAPI;
 433 #endif
 434 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 435         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 436 #endif
 437 #if CONFIG_HEVC_VDPAU_HWACCEL
 438         *fmt++ = AV_PIX_FMT_VDPAU;
 439 #endif
 440 #if CONFIG_HEVC_NVDEC_HWACCEL
 441         *fmt++ = AV_PIX_FMT_CUDA;
 442 #endif
 443         break;
 444     case AV_PIX_FMT_YUV444P:
 445 #if CONFIG_HEVC_VDPAU_HWACCEL
 446         *fmt++ = AV_PIX_FMT_VDPAU;
 447 #endif
 448 #if CONFIG_HEVC_NVDEC_HWACCEL
 449         *fmt++ = AV_PIX_FMT_CUDA;
 450 #endif
 451         break;
 452     case AV_PIX_FMT_YUV422P:
 453     case AV_PIX_FMT_YUV422P10LE:
 454 #if CONFIG_HEVC_VAAPI_HWACCEL
 455        *fmt++ = AV_PIX_FMT_VAAPI;
 456 #endif
 457         break;
 458     case AV_PIX_FMT_YUV420P12:
 459     case AV_PIX_FMT_YUV444P10:
 460     case AV_PIX_FMT_YUV444P12:
 461 #if CONFIG_HEVC_VDPAU_HWACCEL
 462         *fmt++ = AV_PIX_FMT_VDPAU;
 463 #endif
 464 #if CONFIG_HEVC_NVDEC_HWACCEL
 465         *fmt++ = AV_PIX_FMT_CUDA;
 466 #endif
 467         break;
 468     }
 469
 470     *fmt++ = sps->pix_fmt;
 471     *fmt = AV_PIX_FMT_NONE;
 472
 473     return ff_thread_get_format(s->avctx, pix_fmts);
 474 }
 475
 476 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 477                    enum AVPixelFormat pix_fmt)
 478 {
 479     int ret, i;
 480
 481     pic_arrays_free(s);
 482     s->ps.sps = NULL;
 483     s->ps.vps = NULL;
 484
 485     if (!sps)
 486         return 0;
 487
 488     ret = pic_arrays_init(s, sps);
 489     if (ret < 0)
 490         goto fail;
 491
 492     export_stream_params(s, sps);
 493
 494     s->avctx->pix_fmt = pix_fmt;
 495
 496     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 497     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 498     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 499
 500     for (i = 0; i < 3; i++) {
 501         av_freep(&s->sao_pixel_buffer_h[i]);
 502         av_freep(&s->sao_pixel_buffer_v[i]);
 503     }
 504
 505     if (sps->sao_enabled && !s->avctx->hwaccel) {
 506         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 507         int c_idx;
 508
 509         for(c_idx = 0; c_idx < c_count; c_idx++) {
 510             int w = sps->width >> sps->hshift[c_idx];
 511             int h = sps->height >> sps->vshift[c_idx];
 512             s->sao_pixel_buffer_h[c_idx] =
 513                 av_malloc((w * 2 * sps->ctb_height) <<
 514                           sps->pixel_shift);
 515             s->sao_pixel_buffer_v[c_idx] =
 516                 av_malloc((h * 2 * sps->ctb_width) <<
 517                           sps->pixel_shift);
 518         }
 519     }
 520
 521     s->ps.sps = sps;
 522     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 523
 524     return 0;
 525
 526 fail:
 527     pic_arrays_free(s);
 528     s->ps.sps = NULL;
 529     return ret;
 530 }
 531
 532 static int hls_slice_header(HEVCContext *s)
 533 {
 534     GetBitContext *gb = &s->HEVClc->gb;
 535     SliceHeader *sh   = &s->sh;
 536     int i, ret;
 537
 538     // Coded parameters
 539     sh->first_slice_in_pic_flag = get_bits1(gb);
 540     if (s->ref && sh->first_slice_in_pic_flag) {
 541         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
 542         return 1; // This slice will be skipped later, do not corrupt state
 543     }
 544
 545     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 546         s->seq_decode = (s->seq_decode + 1) & 0xff;
 547         s->max_ra     = INT_MAX;
 548         if (IS_IDR(s))
 549             ff_hevc_clear_refs(s);
 550     }
 551     sh->no_output_of_prior_pics_flag = 0;
 552     if (IS_IRAP(s))
 553         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 554
 555     sh->pps_id = get_ue_golomb_long(gb);
 556     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 557         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 558         return AVERROR_INVALIDDATA;
 559     }
 560     if (!sh->first_slice_in_pic_flag &&
 561         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 562         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 563         return AVERROR_INVALIDDATA;
 564     }
 565     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 566     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 567         sh->no_output_of_prior_pics_flag = 1;
 568
 569     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 570         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 571         const HEVCSPS *last_sps = s->ps.sps;
 572         enum AVPixelFormat pix_fmt;
 573
 574         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 575             if (sps->width != last_sps->width || sps->height != last_sps->height ||
 576                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
 577                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 578                 sh->no_output_of_prior_pics_flag = 0;
 579         }
 580         ff_hevc_clear_refs(s);
 581
 582         ret = set_sps(s, sps, sps->pix_fmt);
 583         if (ret < 0)
 584             return ret;
 585
 586         pix_fmt = get_format(s, sps);
 587         if (pix_fmt < 0)
 588             return pix_fmt;
 589         s->avctx->pix_fmt = pix_fmt;
 590
 591         s->seq_decode = (s->seq_decode + 1) & 0xff;
 592         s->max_ra     = INT_MAX;
 593     }
 594
 595     ret = export_stream_params_from_sei(s);
 596     if (ret < 0)
 597         return ret;
 598
 599     sh->dependent_slice_segment_flag = 0;
 600     if (!sh->first_slice_in_pic_flag) {
 601         int slice_address_length;
 602
 603         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 604             sh->dependent_slice_segment_flag = get_bits1(gb);
 605
 606         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 607                                             s->ps.sps->ctb_height);
 608         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 609         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 610             av_log(s->avctx, AV_LOG_ERROR,
 611                    "Invalid slice segment address: %u.\n",
 612                    sh->slice_segment_addr);
 613             return AVERROR_INVALIDDATA;
 614         }
 615
 616         if (!sh->dependent_slice_segment_flag) {
 617             sh->slice_addr = sh->slice_segment_addr;
 618             s->slice_idx++;
 619         }
 620     } else {
 621         sh->slice_segment_addr = sh->slice_addr = 0;
 622         s->slice_idx           = 0;
 623         s->slice_initialized   = 0;
 624     }
 625
 626     if (!sh->dependent_slice_segment_flag) {
 627         s->slice_initialized = 0;
 628
 629         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 630             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 631
 632         sh->slice_type = get_ue_golomb_long(gb);
 633         if (!(sh->slice_type == HEVC_SLICE_I ||
 634               sh->slice_type == HEVC_SLICE_P ||
 635               sh->slice_type == HEVC_SLICE_B)) {
 636             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 637                    sh->slice_type);
 638             return AVERROR_INVALIDDATA;
 639         }
 640         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 641             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 642             return AVERROR_INVALIDDATA;
 643         }
 644
 645         // when flag is not present, picture is inferred to be output
 646         sh->pic_output_flag = 1;
 647         if (s->ps.pps->output_flag_present_flag)
 648             sh->pic_output_flag = get_bits1(gb);
 649
 650         if (s->ps.sps->separate_colour_plane_flag)
 651             sh->colour_plane_id = get_bits(gb, 2);
 652
 653         if (!IS_IDR(s)) {
 654             int poc, pos;
 655
 656             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 657             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
 658             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 659                 av_log(s->avctx, AV_LOG_WARNING,
 660                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 661                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 662                     return AVERROR_INVALIDDATA;
 663                 poc = s->poc;
 664             }
 665             s->poc = poc;
 666
 667             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 668             pos = get_bits_left(gb);
 669             if (!sh->short_term_ref_pic_set_sps_flag) {
 670                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 671                 if (ret < 0)
 672                     return ret;
 673
 674                 sh->short_term_rps = &sh->slice_rps;
 675             } else {
 676                 int numbits, rps_idx;
 677
 678                 if (!s->ps.sps->nb_st_rps) {
 679                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 680                     return AVERROR_INVALIDDATA;
 681                 }
 682
 683                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 684                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 685                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 686             }
 687             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 688
 689             pos = get_bits_left(gb);
 690             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 691             if (ret < 0) {
 692                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 693                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 694                     return AVERROR_INVALIDDATA;
 695             }
 696             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 697
 698             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 699                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 700             else
 701                 sh->slice_temporal_mvp_enabled_flag = 0;
 702         } else {
 703             s->sh.short_term_rps = NULL;
 704             s->poc               = 0;
 705         }
 706
 707         /* 8.3.1 */
 708         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 709             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 710             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 711             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 712             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 713             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 714             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 715             s->nal_unit_type != HEVC_NAL_RASL_R)
 716             s->pocTid0 = s->poc;
 717
 718         if (s->ps.sps->sao_enabled) {
 719             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 720             if (s->ps.sps->chroma_format_idc) {
 721                 sh->slice_sample_adaptive_offset_flag[1] =
 722                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 723             }
 724         } else {
 725             sh->slice_sample_adaptive_offset_flag[0] = 0;
 726             sh->slice_sample_adaptive_offset_flag[1] = 0;
 727             sh->slice_sample_adaptive_offset_flag[2] = 0;
 728         }
 729
 730         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 731         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 732             int nb_refs;
 733
 734             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 735             if (sh->slice_type == HEVC_SLICE_B)
 736                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 737
 738             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 739                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 740                 if (sh->slice_type == HEVC_SLICE_B)
 741                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 742             }
 743             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 744                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 745                        sh->nb_refs[L0], sh->nb_refs[L1]);
 746                 return AVERROR_INVALIDDATA;
 747             }
 748
 749             sh->rpl_modification_flag[0] = 0;
 750             sh->rpl_modification_flag[1] = 0;
 751             nb_refs = ff_hevc_frame_nb_refs(s);
 752             if (!nb_refs) {
 753                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 754                 return AVERROR_INVALIDDATA;
 755             }
 756
 757             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 758                 sh->rpl_modification_flag[0] = get_bits1(gb);
 759                 if (sh->rpl_modification_flag[0]) {
 760                     for (i = 0; i < sh->nb_refs[L0]; i++)
 761                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 762                 }
 763
 764                 if (sh->slice_type == HEVC_SLICE_B) {
 765                     sh->rpl_modification_flag[1] = get_bits1(gb);
 766                     if (sh->rpl_modification_flag[1] == 1)
 767                         for (i = 0; i < sh->nb_refs[L1]; i++)
 768                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 769                 }
 770             }
 771
 772             if (sh->slice_type == HEVC_SLICE_B)
 773                 sh->mvd_l1_zero_flag = get_bits1(gb);
 774
 775             if (s->ps.pps->cabac_init_present_flag)
 776                 sh->cabac_init_flag = get_bits1(gb);
 777             else
 778                 sh->cabac_init_flag = 0;
 779
 780             sh->collocated_ref_idx = 0;
 781             if (sh->slice_temporal_mvp_enabled_flag) {
 782                 sh->collocated_list = L0;
 783                 if (sh->slice_type == HEVC_SLICE_B)
 784                     sh->collocated_list = !get_bits1(gb);
 785
 786                 if (sh->nb_refs[sh->collocated_list] > 1) {
 787                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 788                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 789                         av_log(s->avctx, AV_LOG_ERROR,
 790                                "Invalid collocated_ref_idx: %d.\n",
 791                                sh->collocated_ref_idx);
 792                         return AVERROR_INVALIDDATA;
 793                     }
 794                 }
 795             }
 796
 797             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 798                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 799                 int ret = pred_weight_table(s, gb);
 800                 if (ret < 0)
 801                     return ret;
 802             }
 803
 804             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 805             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 806                 av_log(s->avctx, AV_LOG_ERROR,
 807                        "Invalid number of merging MVP candidates: %d.\n",
 808                        sh->max_num_merge_cand);
 809                 return AVERROR_INVALIDDATA;
 810             }
 811         }
 812
 813         sh->slice_qp_delta = get_se_golomb(gb);
 814
 815         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 816             sh->slice_cb_qp_offset = get_se_golomb(gb);
 817             sh->slice_cr_qp_offset = get_se_golomb(gb);
 818             if (sh->slice_cb_qp_offset < -12 || sh->slice_cb_qp_offset > 12 ||
 819                 sh->slice_cr_qp_offset < -12 || sh->slice_cr_qp_offset > 12) {
 820                 av_log(s->avctx, AV_LOG_ERROR, "Invalid slice cx qp offset.\n");
 821                 return AVERROR_INVALIDDATA;
 822             }
 823         } else {
 824             sh->slice_cb_qp_offset = 0;
 825             sh->slice_cr_qp_offset = 0;
 826         }
 827
 828         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 829             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 830         else
 831             sh->cu_chroma_qp_offset_enabled_flag = 0;
 832
 833         if (s->ps.pps->deblocking_filter_control_present_flag) {
 834             int deblocking_filter_override_flag = 0;
 835
 836             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 837                 deblocking_filter_override_flag = get_bits1(gb);
 838
 839             if (deblocking_filter_override_flag) {
 840                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 841                 if (!sh->disable_deblocking_filter_flag) {
 842                     int beta_offset_div2 = get_se_golomb(gb);
 843                     int tc_offset_div2   = get_se_golomb(gb) ;
 844                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
 845                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
 846                         av_log(s->avctx, AV_LOG_ERROR,
 847                             "Invalid deblock filter offsets: %d, %d\n",
 848                             beta_offset_div2, tc_offset_div2);
 849                         return AVERROR_INVALIDDATA;
 850                     }
 851                     sh->beta_offset = beta_offset_div2 * 2;
 852                     sh->tc_offset   =   tc_offset_div2 * 2;
 853                 }
 854             } else {
 855                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 856                 sh->beta_offset                    = s->ps.pps->beta_offset;
 857                 sh->tc_offset                      = s->ps.pps->tc_offset;
 858             }
 859         } else {
 860             sh->disable_deblocking_filter_flag = 0;
 861             sh->beta_offset                    = 0;
 862             sh->tc_offset                      = 0;
 863         }
 864
 865         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 866             (sh->slice_sample_adaptive_offset_flag[0] ||
 867              sh->slice_sample_adaptive_offset_flag[1] ||
 868              !sh->disable_deblocking_filter_flag)) {
 869             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 870         } else {
 871             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 872         }
 873     } else if (!s->slice_initialized) {
 874         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 875         return AVERROR_INVALIDDATA;
 876     }
 877
 878     sh->num_entry_point_offsets = 0;
 879     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 880         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 881         // It would be possible to bound this tighter but this here is simpler
 882         if (num_entry_point_offsets > get_bits_left(gb)) {
 883             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 884             return AVERROR_INVALIDDATA;
 885         }
 886
 887         sh->num_entry_point_offsets = num_entry_point_offsets;
 888         if (sh->num_entry_point_offsets > 0) {
 889             int offset_len = get_ue_golomb_long(gb) + 1;
 890
 891             if (offset_len < 1 || offset_len > 32) {
 892                 sh->num_entry_point_offsets = 0;
 893                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 894                 return AVERROR_INVALIDDATA;
 895             }
 896
 897             av_freep(&sh->entry_point_offset);
 898             av_freep(&sh->offset);
 899             av_freep(&sh->size);
 900             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 901             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 902             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 903             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 904                 sh->num_entry_point_offsets = 0;
 905                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 906                 return AVERROR(ENOMEM);
 907             }
 908             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 909                 unsigned val = get_bits_long(gb, offset_len);
 910                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 911             }
 912             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 913                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 914                 s->threads_number = 1;
 915             } else
 916                 s->enable_parallel_tiles = 0;
 917         } else
 918             s->enable_parallel_tiles = 0;
 919     }
 920
 921     if (s->ps.pps->slice_header_extension_present_flag) {
 922         unsigned int length = get_ue_golomb_long(gb);
 923         if (length*8LL > get_bits_left(gb)) {
 924             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 925             return AVERROR_INVALIDDATA;
 926         }
 927         for (i = 0; i < length; i++)
 928             skip_bits(gb, 8);  // slice_header_extension_data_byte
 929     }
 930
 931     // Inferred parameters
 932     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 933     if (sh->slice_qp > 51 ||
 934         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 935         av_log(s->avctx, AV_LOG_ERROR,
 936                "The slice_qp %d is outside the valid range "
 937                "[%d, 51].\n",
 938                sh->slice_qp,
 939                -s->ps.sps->qp_bd_offset);
 940         return AVERROR_INVALIDDATA;
 941     }
 942
 943     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 944
 945     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 946         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 947         return AVERROR_INVALIDDATA;
 948     }
 949
 950     if (get_bits_left(gb) < 0) {
 951         av_log(s->avctx, AV_LOG_ERROR,
 952                "Overread slice header by %d bits\n", -get_bits_left(gb));
 953         return AVERROR_INVALIDDATA;
 954     }
 955
 956     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 957
 958     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 959         s->HEVClc->qp_y = s->sh.slice_qp;
 960
 961     s->slice_initialized = 1;
 962     s->HEVClc->tu.cu_qp_offset_cb = 0;
 963     s->HEVClc->tu.cu_qp_offset_cr = 0;
 964
 965     return 0;
 966 }
 967
 968 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 969
 970 #define SET_SAO(elem, value)                            \
 971 do {                                                    \
 972     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 973         sao->elem = value;                              \
 974     else if (sao_merge_left_flag)                       \
 975         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 976     else if (sao_merge_up_flag)                         \
 977         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 978     else                                                \
 979         sao->elem = 0;                                  \
 980 } while (0)
 981
 982 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 983 {
 984     HEVCLocalContext *lc    = s->HEVClc;
 985     int sao_merge_left_flag = 0;
 986     int sao_merge_up_flag   = 0;
 987     SAOParams *sao          = &CTB(s->sao, rx, ry);
 988     int c_idx, i;
 989
 990     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 991         s->sh.slice_sample_adaptive_offset_flag[1]) {
 992         if (rx > 0) {
 993             if (lc->ctb_left_flag)
 994                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 995         }
 996         if (ry > 0 && !sao_merge_left_flag) {
 997             if (lc->ctb_up_flag)
 998                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 999         }
1000     }
1001
1002     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
1003         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
1004                                                  s->ps.pps->log2_sao_offset_scale_chroma;
1005
1006         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
1007             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
1008             continue;
1009         }
1010
1011         if (c_idx == 2) {
1012             sao->type_idx[2] = sao->type_idx[1];
1013             sao->eo_class[2] = sao->eo_class[1];
1014         } else {
1015             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
1016         }
1017
1018         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
1019             continue;
1020
1021         for (i = 0; i < 4; i++)
1022             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
1023
1024         if (sao->type_idx[c_idx] == SAO_BAND) {
1025             for (i = 0; i < 4; i++) {
1026                 if (sao->offset_abs[c_idx][i]) {
1027                     SET_SAO(offset_sign[c_idx][i],
1028                             ff_hevc_sao_offset_sign_decode(s));
1029                 } else {
1030                     sao->offset_sign[c_idx][i] = 0;
1031                 }
1032             }
1033             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
1034         } else if (c_idx != 2) {
1035             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
1036         }
1037
1038         // Inferred parameters
1039         sao->offset_val[c_idx][0] = 0;
1040         for (i = 0; i < 4; i++) {
1041             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
1042             if (sao->type_idx[c_idx] == SAO_EDGE) {
1043                 if (i > 1)
1044                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1045             } else if (sao->offset_sign[c_idx][i]) {
1046                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
1047             }
1048             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
1049         }
1050     }
1051 }
1052
1053 #undef SET_SAO
1054 #undef CTB
1055
1056 static int hls_cross_component_pred(HEVCContext *s, int idx) {
1057     HEVCLocalContext *lc    = s->HEVClc;
1058     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1059
1060     if (log2_res_scale_abs_plus1 !=  0) {
1061         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1062         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1063                                (1 - 2 * res_scale_sign_flag);
1064     } else {
1065         lc->tu.res_scale_val = 0;
1066     }
1067
1068
1069     return 0;
1070 }
1071
1072 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1073                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1074                               int log2_cb_size, int log2_trafo_size,
1075                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1076 {
1077     HEVCLocalContext *lc = s->HEVClc;
1078     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1079     int i;
1080
1081     if (lc->cu.pred_mode == MODE_INTRA) {
1082         int trafo_size = 1 << log2_trafo_size;
1083         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1084
1085         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1086     }
1087
1088     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1089         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1090         int scan_idx   = SCAN_DIAG;
1091         int scan_idx_c = SCAN_DIAG;
1092         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1093                          (s->ps.sps->chroma_format_idc == 2 &&
1094                          (cbf_cb[1] || cbf_cr[1]));
1095
1096         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1097             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1098             if (lc->tu.cu_qp_delta != 0)
1099                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1100                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1101             lc->tu.is_cu_qp_delta_coded = 1;
1102
1103             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1104                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1105                 av_log(s->avctx, AV_LOG_ERROR,
1106                        "The cu_qp_delta %d is outside the valid range "
1107                        "[%d, %d].\n",
1108                        lc->tu.cu_qp_delta,
1109                        -(26 + s->ps.sps->qp_bd_offset / 2),
1110                         (25 + s->ps.sps->qp_bd_offset / 2));
1111                 return AVERROR_INVALIDDATA;
1112             }
1113
1114             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1115         }
1116
1117         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1118             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1119             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1120             if (cu_chroma_qp_offset_flag) {
1121                 int cu_chroma_qp_offset_idx  = 0;
1122                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1123                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1124                     av_log(s->avctx, AV_LOG_ERROR,
1125                         "cu_chroma_qp_offset_idx not yet tested.\n");
1126                 }
1127                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1128                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1129             } else {
1130                 lc->tu.cu_qp_offset_cb = 0;
1131                 lc->tu.cu_qp_offset_cr = 0;
1132             }
1133             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1134         }
1135
1136         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1137             if (lc->tu.intra_pred_mode >= 6 &&
1138                 lc->tu.intra_pred_mode <= 14) {
1139                 scan_idx = SCAN_VERT;
1140             } else if (lc->tu.intra_pred_mode >= 22 &&
1141                        lc->tu.intra_pred_mode <= 30) {
1142                 scan_idx = SCAN_HORIZ;
1143             }
1144
1145             if (lc->tu.intra_pred_mode_c >=  6 &&
1146                 lc->tu.intra_pred_mode_c <= 14) {
1147                 scan_idx_c = SCAN_VERT;
1148             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1149                        lc->tu.intra_pred_mode_c <= 30) {
1150                 scan_idx_c = SCAN_HORIZ;
1151             }
1152         }
1153
1154         lc->tu.cross_pf = 0;
1155
1156         if (cbf_luma)
1157             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1158         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1159             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1160             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1161             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1162                                 (lc->cu.pred_mode == MODE_INTER ||
1163                                  (lc->tu.chroma_mode_c ==  4)));
1164
1165             if (lc->tu.cross_pf) {
1166                 hls_cross_component_pred(s, 0);
1167             }
1168             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1169                 if (lc->cu.pred_mode == MODE_INTRA) {
1170                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1171                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1172                 }
1173                 if (cbf_cb[i])
1174                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1175                                                 log2_trafo_size_c, scan_idx_c, 1);
1176                 else
1177                     if (lc->tu.cross_pf) {
1178                         ptrdiff_t stride = s->frame->linesize[1];
1179                         int hshift = s->ps.sps->hshift[1];
1180                         int vshift = s->ps.sps->vshift[1];
1181                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1182                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1183                         int size = 1 << log2_trafo_size_c;
1184
1185                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1186                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1187                         for (i = 0; i < (size * size); i++) {
1188                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1189                         }
1190                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1191                     }
1192             }
1193
1194             if (lc->tu.cross_pf) {
1195                 hls_cross_component_pred(s, 1);
1196             }
1197             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1198                 if (lc->cu.pred_mode == MODE_INTRA) {
1199                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1200                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1201                 }
1202                 if (cbf_cr[i])
1203                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1204                                                 log2_trafo_size_c, scan_idx_c, 2);
1205                 else
1206                     if (lc->tu.cross_pf) {
1207                         ptrdiff_t stride = s->frame->linesize[2];
1208                         int hshift = s->ps.sps->hshift[2];
1209                         int vshift = s->ps.sps->vshift[2];
1210                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1211                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1212                         int size = 1 << log2_trafo_size_c;
1213
1214                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1215                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1216                         for (i = 0; i < (size * size); i++) {
1217                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1218                         }
1219                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1220                     }
1221             }
1222         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1223             int trafo_size_h = 1 << (log2_trafo_size + 1);
1224             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1225             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1226                 if (lc->cu.pred_mode == MODE_INTRA) {
1227                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1228                                                     trafo_size_h, trafo_size_v);
1229                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1230                 }
1231                 if (cbf_cb[i])
1232                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1233                                                 log2_trafo_size, scan_idx_c, 1);
1234             }
1235             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1236                 if (lc->cu.pred_mode == MODE_INTRA) {
1237                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1238                                                 trafo_size_h, trafo_size_v);
1239                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1240                 }
1241                 if (cbf_cr[i])
1242                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1243                                                 log2_trafo_size, scan_idx_c, 2);
1244             }
1245         }
1246     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1247         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1248             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1249             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1250             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1251             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1252             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1253             if (s->ps.sps->chroma_format_idc == 2) {
1254                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1255                                                 trafo_size_h, trafo_size_v);
1256                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1257                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1258             }
1259         } else if (blk_idx == 3) {
1260             int trafo_size_h = 1 << (log2_trafo_size + 1);
1261             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1262             ff_hevc_set_neighbour_available(s, xBase, yBase,
1263                                             trafo_size_h, trafo_size_v);
1264             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1265             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1266             if (s->ps.sps->chroma_format_idc == 2) {
1267                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1268                                                 trafo_size_h, trafo_size_v);
1269                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1270                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1271             }
1272         }
1273     }
1274
1275     return 0;
1276 }
1277
1278 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1279 {
1280     int cb_size          = 1 << log2_cb_size;
1281     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1282
1283     int min_pu_width     = s->ps.sps->min_pu_width;
1284     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1285     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1286     int i, j;
1287
1288     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1289         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1290             s->is_pcm[i + j * min_pu_width] = 2;
1291 }
1292
1293 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1294                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1295                               int log2_cb_size, int log2_trafo_size,
1296                               int trafo_depth, int blk_idx,
1297                               const int *base_cbf_cb, const int *base_cbf_cr)
1298 {
1299     HEVCLocalContext *lc = s->HEVClc;
1300     uint8_t split_transform_flag;
1301     int cbf_cb[2];
1302     int cbf_cr[2];
1303     int ret;
1304
1305     cbf_cb[0] = base_cbf_cb[0];
1306     cbf_cb[1] = base_cbf_cb[1];
1307     cbf_cr[0] = base_cbf_cr[0];
1308     cbf_cr[1] = base_cbf_cr[1];
1309
1310     if (lc->cu.intra_split_flag) {
1311         if (trafo_depth == 1) {
1312             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1313             if (s->ps.sps->chroma_format_idc == 3) {
1314                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1315                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1316             } else {
1317                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1318                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1319             }
1320         }
1321     } else {
1322         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1323         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1324         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1325     }
1326
1327     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1328         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1329         trafo_depth     < lc->cu.max_trafo_depth       &&
1330         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1331         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1332     } else {
1333         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1334                           lc->cu.pred_mode == MODE_INTER &&
1335                           lc->cu.part_mode != PART_2Nx2N &&
1336                           trafo_depth == 0;
1337
1338         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1339                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1340                                inter_split;
1341     }
1342
1343     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1344         if (trafo_depth == 0 || cbf_cb[0]) {
1345             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1346             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1347                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1348             }
1349         }
1350
1351         if (trafo_depth == 0 || cbf_cr[0]) {
1352             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1353             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1354                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1355             }
1356         }
1357     }
1358
1359     if (split_transform_flag) {
1360         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1361         const int x1 = x0 + trafo_size_split;
1362         const int y1 = y0 + trafo_size_split;
1363
1364 #define SUBDIVIDE(x, y, idx)                                                    \
1365 do {                                                                            \
1366     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1367                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1368                              cbf_cb, cbf_cr);                                   \
1369     if (ret < 0)                                                                \
1370         return ret;                                                             \
1371 } while (0)
1372
1373         SUBDIVIDE(x0, y0, 0);
1374         SUBDIVIDE(x1, y0, 1);
1375         SUBDIVIDE(x0, y1, 2);
1376         SUBDIVIDE(x1, y1, 3);
1377
1378 #undef SUBDIVIDE
1379     } else {
1380         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1381         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1382         int min_tu_width     = s->ps.sps->min_tb_width;
1383         int cbf_luma         = 1;
1384
1385         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1386             cbf_cb[0] || cbf_cr[0] ||
1387             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1388             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1389         }
1390
1391         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1392                                  log2_cb_size, log2_trafo_size,
1393                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1394         if (ret < 0)
1395             return ret;
1396         // TODO: store cbf_luma somewhere else
1397         if (cbf_luma) {
1398             int i, j;
1399             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1400                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1401                     int x_tu = (x0 + j) >> log2_min_tu_size;
1402                     int y_tu = (y0 + i) >> log2_min_tu_size;
1403                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1404                 }
1405         }
1406         if (!s->sh.disable_deblocking_filter_flag) {
1407             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1408             if (s->ps.pps->transquant_bypass_enable_flag &&
1409                 lc->cu.cu_transquant_bypass_flag)
1410                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1411         }
1412     }
1413     return 0;
1414 }
1415
1416 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1417 {
1418     HEVCLocalContext *lc = s->HEVClc;
1419     GetBitContext gb;
1420     int cb_size   = 1 << log2_cb_size;
1421     ptrdiff_t stride0 = s->frame->linesize[0];
1422     ptrdiff_t stride1 = s->frame->linesize[1];
1423     ptrdiff_t stride2 = s->frame->linesize[2];
1424     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1425     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1426     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1427
1428     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1429                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1430                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1431                           s->ps.sps->pcm.bit_depth_chroma;
1432     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1433     int ret;
1434
1435     if (!s->sh.disable_deblocking_filter_flag)
1436         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1437
1438     ret = init_get_bits(&gb, pcm, length);
1439     if (ret < 0)
1440         return ret;
1441
1442     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1443     if (s->ps.sps->chroma_format_idc) {
1444         s->hevcdsp.put_pcm(dst1, stride1,
1445                            cb_size >> s->ps.sps->hshift[1],
1446                            cb_size >> s->ps.sps->vshift[1],
1447                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1448         s->hevcdsp.put_pcm(dst2, stride2,
1449                            cb_size >> s->ps.sps->hshift[2],
1450                            cb_size >> s->ps.sps->vshift[2],
1451                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1452     }
1453
1454     return 0;
1455 }
1456
1457 /**
1458  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1459  *
1460  * @param s HEVC decoding context
1461  * @param dst target buffer for block data at block position
1462  * @param dststride stride of the dst buffer
1463  * @param ref reference picture buffer at origin (0, 0)
1464  * @param mv motion vector (relative to block position) to get pixel data from
1465  * @param x_off horizontal position of block from origin (0, 0)
1466  * @param y_off vertical position of block from origin (0, 0)
1467  * @param block_w width of block
1468  * @param block_h height of block
1469  * @param luma_weight weighting factor applied to the luma prediction
1470  * @param luma_offset additive offset applied to the luma prediction value
1471  */
1472
1473 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1474                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1475                         int block_w, int block_h, int luma_weight, int luma_offset)
1476 {
1477     HEVCLocalContext *lc = s->HEVClc;
1478     uint8_t *src         = ref->data[0];
1479     ptrdiff_t srcstride  = ref->linesize[0];
1480     int pic_width        = s->ps.sps->width;
1481     int pic_height       = s->ps.sps->height;
1482     int mx               = mv->x & 3;
1483     int my               = mv->y & 3;
1484     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1485                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1486     int idx              = ff_hevc_pel_weight[block_w];
1487
1488     x_off += mv->x >> 2;
1489     y_off += mv->y >> 2;
1490     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1491
1492     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1493         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1494         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1495         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1496         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1497         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1498
1499         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1500                                  edge_emu_stride, srcstride,
1501                                  block_w + QPEL_EXTRA,
1502                                  block_h + QPEL_EXTRA,
1503                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1504                                  pic_width, pic_height);
1505         src = lc->edge_emu_buffer + buf_offset;
1506         srcstride = edge_emu_stride;
1507     }
1508
1509     if (!weight_flag)
1510         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1511                                                       block_h, mx, my, block_w);
1512     else
1513         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1514                                                         block_h, s->sh.luma_log2_weight_denom,
1515                                                         luma_weight, luma_offset, mx, my, block_w);
1516 }
1517
1518 /**
1519  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1520  *
1521  * @param s HEVC decoding context
1522  * @param dst target buffer for block data at block position
1523  * @param dststride stride of the dst buffer
1524  * @param ref0 reference picture0 buffer at origin (0, 0)
1525  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1526  * @param x_off horizontal position of block from origin (0, 0)
1527  * @param y_off vertical position of block from origin (0, 0)
1528  * @param block_w width of block
1529  * @param block_h height of block
1530  * @param ref1 reference picture1 buffer at origin (0, 0)
1531  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1532  * @param current_mv current motion vector structure
1533  */
1534  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1535                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1536                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1537 {
1538     HEVCLocalContext *lc = s->HEVClc;
1539     ptrdiff_t src0stride  = ref0->linesize[0];
1540     ptrdiff_t src1stride  = ref1->linesize[0];
1541     int pic_width        = s->ps.sps->width;
1542     int pic_height       = s->ps.sps->height;
1543     int mx0              = mv0->x & 3;
1544     int my0              = mv0->y & 3;
1545     int mx1              = mv1->x & 3;
1546     int my1              = mv1->y & 3;
1547     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1548                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1549     int x_off0           = x_off + (mv0->x >> 2);
1550     int y_off0           = y_off + (mv0->y >> 2);
1551     int x_off1           = x_off + (mv1->x >> 2);
1552     int y_off1           = y_off + (mv1->y >> 2);
1553     int idx              = ff_hevc_pel_weight[block_w];
1554
1555     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1556     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1557
1558     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1559         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1560         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1561         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1562         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1563         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1564
1565         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1566                                  edge_emu_stride, src0stride,
1567                                  block_w + QPEL_EXTRA,
1568                                  block_h + QPEL_EXTRA,
1569                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1570                                  pic_width, pic_height);
1571         src0 = lc->edge_emu_buffer + buf_offset;
1572         src0stride = edge_emu_stride;
1573     }
1574
1575     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1576         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1577         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1578         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1579         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1580         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1581
1582         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1583                                  edge_emu_stride, src1stride,
1584                                  block_w + QPEL_EXTRA,
1585                                  block_h + QPEL_EXTRA,
1586                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1587                                  pic_width, pic_height);
1588         src1 = lc->edge_emu_buffer2 + buf_offset;
1589         src1stride = edge_emu_stride;
1590     }
1591
1592     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1593                                                 block_h, mx0, my0, block_w);
1594     if (!weight_flag)
1595         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1596                                                        block_h, mx1, my1, block_w);
1597     else
1598         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1599                                                          block_h, s->sh.luma_log2_weight_denom,
1600                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1601                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1602                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1603                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1604                                                          mx1, my1, block_w);
1605
1606 }
1607
1608 /**
1609  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1610  *
1611  * @param s HEVC decoding context
1612  * @param dst1 target buffer for block data at block position (U plane)
1613  * @param dst2 target buffer for block data at block position (V plane)
1614  * @param dststride stride of the dst1 and dst2 buffers
1615  * @param ref reference picture buffer at origin (0, 0)
1616  * @param mv motion vector (relative to block position) to get pixel data from
1617  * @param x_off horizontal position of block from origin (0, 0)
1618  * @param y_off vertical position of block from origin (0, 0)
1619  * @param block_w width of block
1620  * @param block_h height of block
1621  * @param chroma_weight weighting factor applied to the chroma prediction
1622  * @param chroma_offset additive offset applied to the chroma prediction value
1623  */
1624
1625 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1626                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1627                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1628 {
1629     HEVCLocalContext *lc = s->HEVClc;
1630     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1631     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1632     const Mv *mv         = &current_mv->mv[reflist];
1633     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1634                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1635     int idx              = ff_hevc_pel_weight[block_w];
1636     int hshift           = s->ps.sps->hshift[1];
1637     int vshift           = s->ps.sps->vshift[1];
1638     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1639     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1640     intptr_t _mx         = mx << (1 - hshift);
1641     intptr_t _my         = my << (1 - vshift);
1642
1643     x_off += mv->x >> (2 + hshift);
1644     y_off += mv->y >> (2 + vshift);
1645     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1646
1647     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1648         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1649         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1650         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1651         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1652         int buf_offset0 = EPEL_EXTRA_BEFORE *
1653                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1654         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1655                                  edge_emu_stride, srcstride,
1656                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1657                                  x_off - EPEL_EXTRA_BEFORE,
1658                                  y_off - EPEL_EXTRA_BEFORE,
1659                                  pic_width, pic_height);
1660
1661         src0 = lc->edge_emu_buffer + buf_offset0;
1662         srcstride = edge_emu_stride;
1663     }
1664     if (!weight_flag)
1665         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1666                                                   block_h, _mx, _my, block_w);
1667     else
1668         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1669                                                         block_h, s->sh.chroma_log2_weight_denom,
1670                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1671 }
1672
1673 /**
1674  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1675  *
1676  * @param s HEVC decoding context
1677  * @param dst target buffer for block data at block position
1678  * @param dststride stride of the dst buffer
1679  * @param ref0 reference picture0 buffer at origin (0, 0)
1680  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1681  * @param x_off horizontal position of block from origin (0, 0)
1682  * @param y_off vertical position of block from origin (0, 0)
1683  * @param block_w width of block
1684  * @param block_h height of block
1685  * @param ref1 reference picture1 buffer at origin (0, 0)
1686  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1687  * @param current_mv current motion vector structure
1688  * @param cidx chroma component(cb, cr)
1689  */
1690 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1691                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1692 {
1693     HEVCLocalContext *lc = s->HEVClc;
1694     uint8_t *src1        = ref0->data[cidx+1];
1695     uint8_t *src2        = ref1->data[cidx+1];
1696     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1697     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1698     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1699                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1700     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1701     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1702     Mv *mv0              = &current_mv->mv[0];
1703     Mv *mv1              = &current_mv->mv[1];
1704     int hshift = s->ps.sps->hshift[1];
1705     int vshift = s->ps.sps->vshift[1];
1706
1707     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1708     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1709     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1710     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1711     intptr_t _mx0 = mx0 << (1 - hshift);
1712     intptr_t _my0 = my0 << (1 - vshift);
1713     intptr_t _mx1 = mx1 << (1 - hshift);
1714     intptr_t _my1 = my1 << (1 - vshift);
1715
1716     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1717     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1718     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1719     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1720     int idx = ff_hevc_pel_weight[block_w];
1721     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1722     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1723
1724     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1725         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1726         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1727         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1728         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1729         int buf_offset1 = EPEL_EXTRA_BEFORE *
1730                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1731
1732         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1733                                  edge_emu_stride, src1stride,
1734                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1735                                  x_off0 - EPEL_EXTRA_BEFORE,
1736                                  y_off0 - EPEL_EXTRA_BEFORE,
1737                                  pic_width, pic_height);
1738
1739         src1 = lc->edge_emu_buffer + buf_offset1;
1740         src1stride = edge_emu_stride;
1741     }
1742
1743     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1744         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1745         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1746         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1747         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1748         int buf_offset1 = EPEL_EXTRA_BEFORE *
1749                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1750
1751         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1752                                  edge_emu_stride, src2stride,
1753                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1754                                  x_off1 - EPEL_EXTRA_BEFORE,
1755                                  y_off1 - EPEL_EXTRA_BEFORE,
1756                                  pic_width, pic_height);
1757
1758         src2 = lc->edge_emu_buffer2 + buf_offset1;
1759         src2stride = edge_emu_stride;
1760     }
1761
1762     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1763                                                 block_h, _mx0, _my0, block_w);
1764     if (!weight_flag)
1765         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1766                                                        src2, src2stride, lc->tmp,
1767                                                        block_h, _mx1, _my1, block_w);
1768     else
1769         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1770                                                          src2, src2stride, lc->tmp,
1771                                                          block_h,
1772                                                          s->sh.chroma_log2_weight_denom,
1773                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1774                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1775                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1776                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1777                                                          _mx1, _my1, block_w);
1778 }
1779
1780 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1781                                 const Mv *mv, int y0, int height)
1782 {
1783     if (s->threads_type == FF_THREAD_FRAME ) {
1784         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1785
1786         ff_thread_await_progress(&ref->tf, y, 0);
1787     }
1788 }
1789
1790 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1791                                   int nPbH, int log2_cb_size, int part_idx,
1792                                   int merge_idx, MvField *mv)
1793 {
1794     HEVCLocalContext *lc = s->HEVClc;
1795     enum InterPredIdc inter_pred_idc = PRED_L0;
1796     int mvp_flag;
1797
1798     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1799     mv->pred_flag = 0;
1800     if (s->sh.slice_type == HEVC_SLICE_B)
1801         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1802
1803     if (inter_pred_idc != PRED_L1) {
1804         if (s->sh.nb_refs[L0])
1805             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1806
1807         mv->pred_flag = PF_L0;
1808         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1809         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1810         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1811                                  part_idx, merge_idx, mv, mvp_flag, 0);
1812         mv->mv[0].x += lc->pu.mvd.x;
1813         mv->mv[0].y += lc->pu.mvd.y;
1814     }
1815
1816     if (inter_pred_idc != PRED_L0) {
1817         if (s->sh.nb_refs[L1])
1818             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1819
1820         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1821             AV_ZERO32(&lc->pu.mvd);
1822         } else {
1823             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1824         }
1825
1826         mv->pred_flag += PF_L1;
1827         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1828         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1829                                  part_idx, merge_idx, mv, mvp_flag, 1);
1830         mv->mv[1].x += lc->pu.mvd.x;
1831         mv->mv[1].y += lc->pu.mvd.y;
1832     }
1833 }
1834
1835 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1836                                 int nPbW, int nPbH,
1837                                 int log2_cb_size, int partIdx, int idx)
1838 {
1839 #define POS(c_idx, x, y)                                                              \
1840     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1841                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1842     HEVCLocalContext *lc = s->HEVClc;
1843     int merge_idx = 0;
1844     struct MvField current_mv = {{{ 0 }}};
1845
1846     int min_pu_width = s->ps.sps->min_pu_width;
1847
1848     MvField *tab_mvf = s->ref->tab_mvf;
1849     RefPicList  *refPicList = s->ref->refPicList;
1850     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1851     uint8_t *dst0 = POS(0, x0, y0);
1852     uint8_t *dst1 = POS(1, x0, y0);
1853     uint8_t *dst2 = POS(2, x0, y0);
1854     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1855     int min_cb_width     = s->ps.sps->min_cb_width;
1856     int x_cb             = x0 >> log2_min_cb_size;
1857     int y_cb             = y0 >> log2_min_cb_size;
1858     int x_pu, y_pu;
1859     int i, j;
1860
1861     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1862
1863     if (!skip_flag)
1864         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1865
1866     if (skip_flag || lc->pu.merge_flag) {
1867         if (s->sh.max_num_merge_cand > 1)
1868             merge_idx = ff_hevc_merge_idx_decode(s);
1869         else
1870             merge_idx = 0;
1871
1872         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1873                                    partIdx, merge_idx, &current_mv);
1874     } else {
1875         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1876                               partIdx, merge_idx, &current_mv);
1877     }
1878
1879     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1880     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1881
1882     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1883         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1884             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1885
1886     if (current_mv.pred_flag & PF_L0) {
1887         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1888         if (!ref0)
1889             return;
1890         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1891     }
1892     if (current_mv.pred_flag & PF_L1) {
1893         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1894         if (!ref1)
1895             return;
1896         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1897     }
1898
1899     if (current_mv.pred_flag == PF_L0) {
1900         int x0_c = x0 >> s->ps.sps->hshift[1];
1901         int y0_c = y0 >> s->ps.sps->vshift[1];
1902         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1903         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1904
1905         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1906                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1907                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1908                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1909
1910         if (s->ps.sps->chroma_format_idc) {
1911             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1912                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1913                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1914             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1915                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1916                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1917         }
1918     } else if (current_mv.pred_flag == PF_L1) {
1919         int x0_c = x0 >> s->ps.sps->hshift[1];
1920         int y0_c = y0 >> s->ps.sps->vshift[1];
1921         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1922         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1923
1924         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1925                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1926                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1927                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1928
1929         if (s->ps.sps->chroma_format_idc) {
1930             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1931                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1932                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1933
1934             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1935                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1936                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1937         }
1938     } else if (current_mv.pred_flag == PF_BI) {
1939         int x0_c = x0 >> s->ps.sps->hshift[1];
1940         int y0_c = y0 >> s->ps.sps->vshift[1];
1941         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1942         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1943
1944         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1945                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1946                    ref1->frame, &current_mv.mv[1], &current_mv);
1947
1948         if (s->ps.sps->chroma_format_idc) {
1949             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1950                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1951
1952             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1953                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1954         }
1955     }
1956 }
1957
1958 /**
1959  * 8.4.1
1960  */
1961 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1962                                 int prev_intra_luma_pred_flag)
1963 {
1964     HEVCLocalContext *lc = s->HEVClc;
1965     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1966     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1967     int min_pu_width     = s->ps.sps->min_pu_width;
1968     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1969     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1970     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1971
1972     int cand_up   = (lc->ctb_up_flag || y0b) ?
1973                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1974     int cand_left = (lc->ctb_left_flag || x0b) ?
1975                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1976
1977     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1978
1979     MvField *tab_mvf = s->ref->tab_mvf;
1980     int intra_pred_mode;
1981     int candidate[3];
1982     int i, j;
1983
1984     // intra_pred_mode prediction does not cross vertical CTB boundaries
1985     if ((y0 - 1) < y_ctb)
1986         cand_up = INTRA_DC;
1987
1988     if (cand_left == cand_up) {
1989         if (cand_left < 2) {
1990             candidate[0] = INTRA_PLANAR;
1991             candidate[1] = INTRA_DC;
1992             candidate[2] = INTRA_ANGULAR_26;
1993         } else {
1994             candidate[0] = cand_left;
1995             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1996             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1997         }
1998     } else {
1999         candidate[0] = cand_left;
2000         candidate[1] = cand_up;
2001         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
2002             candidate[2] = INTRA_PLANAR;
2003         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
2004             candidate[2] = INTRA_DC;
2005         } else {
2006             candidate[2] = INTRA_ANGULAR_26;
2007         }
2008     }
2009
2010     if (prev_intra_luma_pred_flag) {
2011         intra_pred_mode = candidate[lc->pu.mpm_idx];
2012     } else {
2013         if (candidate[0] > candidate[1])
2014             FFSWAP(uint8_t, candidate[0], candidate[1]);
2015         if (candidate[0] > candidate[2])
2016             FFSWAP(uint8_t, candidate[0], candidate[2]);
2017         if (candidate[1] > candidate[2])
2018             FFSWAP(uint8_t, candidate[1], candidate[2]);
2019
2020         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
2021         for (i = 0; i < 3; i++)
2022             if (intra_pred_mode >= candidate[i])
2023                 intra_pred_mode++;
2024     }
2025
2026     /* write the intra prediction units into the mv array */
2027     if (!size_in_pus)
2028         size_in_pus = 1;
2029     for (i = 0; i < size_in_pus; i++) {
2030         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
2031                intra_pred_mode, size_in_pus);
2032
2033         for (j = 0; j < size_in_pus; j++) {
2034             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
2035         }
2036     }
2037
2038     return intra_pred_mode;
2039 }
2040
2041 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
2042                                           int log2_cb_size, int ct_depth)
2043 {
2044     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
2045     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
2046     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
2047     int y;
2048
2049     for (y = 0; y < length; y++)
2050         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
2051                ct_depth, length);
2052 }
2053
2054 static const uint8_t tab_mode_idx[] = {
2055      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
2056     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
2057
2058 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2059                                   int log2_cb_size)
2060 {
2061     HEVCLocalContext *lc = s->HEVClc;
2062     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2063     uint8_t prev_intra_luma_pred_flag[4];
2064     int split   = lc->cu.part_mode == PART_NxN;
2065     int pb_size = (1 << log2_cb_size) >> split;
2066     int side    = split + 1;
2067     int chroma_mode;
2068     int i, j;
2069
2070     for (i = 0; i < side; i++)
2071         for (j = 0; j < side; j++)
2072             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2073
2074     for (i = 0; i < side; i++) {
2075         for (j = 0; j < side; j++) {
2076             if (prev_intra_luma_pred_flag[2 * i + j])
2077                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2078             else
2079                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2080
2081             lc->pu.intra_pred_mode[2 * i + j] =
2082                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2083                                      prev_intra_luma_pred_flag[2 * i + j]);
2084         }
2085     }
2086
2087     if (s->ps.sps->chroma_format_idc == 3) {
2088         for (i = 0; i < side; i++) {
2089             for (j = 0; j < side; j++) {
2090                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2091                 if (chroma_mode != 4) {
2092                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2093                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2094                     else
2095                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2096                 } else {
2097                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2098                 }
2099             }
2100         }
2101     } else if (s->ps.sps->chroma_format_idc == 2) {
2102         int mode_idx;
2103         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2104         if (chroma_mode != 4) {
2105             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2106                 mode_idx = 34;
2107             else
2108                 mode_idx = intra_chroma_table[chroma_mode];
2109         } else {
2110             mode_idx = lc->pu.intra_pred_mode[0];
2111         }
2112         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2113     } else if (s->ps.sps->chroma_format_idc != 0) {
2114         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2115         if (chroma_mode != 4) {
2116             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2117                 lc->pu.intra_pred_mode_c[0] = 34;
2118             else
2119                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2120         } else {
2121             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2122         }
2123     }
2124 }
2125
2126 static void intra_prediction_unit_default_value(HEVCContext *s,
2127                                                 int x0, int y0,
2128                                                 int log2_cb_size)
2129 {
2130     HEVCLocalContext *lc = s->HEVClc;
2131     int pb_size          = 1 << log2_cb_size;
2132     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2133     int min_pu_width     = s->ps.sps->min_pu_width;
2134     MvField *tab_mvf     = s->ref->tab_mvf;
2135     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2136     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2137     int j, k;
2138
2139     if (size_in_pus == 0)
2140         size_in_pus = 1;
2141     for (j = 0; j < size_in_pus; j++)
2142         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2143     if (lc->cu.pred_mode == MODE_INTRA)
2144         for (j = 0; j < size_in_pus; j++)
2145             for (k = 0; k < size_in_pus; k++)
2146                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2147 }
2148
2149 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2150 {
2151     int cb_size          = 1 << log2_cb_size;
2152     HEVCLocalContext *lc = s->HEVClc;
2153     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2154     int length           = cb_size >> log2_min_cb_size;
2155     int min_cb_width     = s->ps.sps->min_cb_width;
2156     int x_cb             = x0 >> log2_min_cb_size;
2157     int y_cb             = y0 >> log2_min_cb_size;
2158     int idx              = log2_cb_size - 2;
2159     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2160     int x, y, ret;
2161
2162     lc->cu.x                = x0;
2163     lc->cu.y                = y0;
2164     lc->cu.pred_mode        = MODE_INTRA;
2165     lc->cu.part_mode        = PART_2Nx2N;
2166     lc->cu.intra_split_flag = 0;
2167
2168     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2169     for (x = 0; x < 4; x++)
2170         lc->pu.intra_pred_mode[x] = 1;
2171     if (s->ps.pps->transquant_bypass_enable_flag) {
2172         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2173         if (lc->cu.cu_transquant_bypass_flag)
2174             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2175     } else
2176         lc->cu.cu_transquant_bypass_flag = 0;
2177
2178     if (s->sh.slice_type != HEVC_SLICE_I) {
2179         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2180
2181         x = y_cb * min_cb_width + x_cb;
2182         for (y = 0; y < length; y++) {
2183             memset(&s->skip_flag[x], skip_flag, length);
2184             x += min_cb_width;
2185         }
2186         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2187     } else {
2188         x = y_cb * min_cb_width + x_cb;
2189         for (y = 0; y < length; y++) {
2190             memset(&s->skip_flag[x], 0, length);
2191             x += min_cb_width;
2192         }
2193     }
2194
2195     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2196         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2197         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2198
2199         if (!s->sh.disable_deblocking_filter_flag)
2200             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2201     } else {
2202         int pcm_flag = 0;
2203
2204         if (s->sh.slice_type != HEVC_SLICE_I)
2205             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2206         if (lc->cu.pred_mode != MODE_INTRA ||
2207             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2208             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2209             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2210                                       lc->cu.pred_mode == MODE_INTRA;
2211         }
2212
2213         if (lc->cu.pred_mode == MODE_INTRA) {
2214             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2215                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2216                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2217                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2218             }
2219             if (pcm_flag) {
2220                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2221                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2222                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2223                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2224
2225                 if (ret < 0)
2226                     return ret;
2227             } else {
2228                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2229             }
2230         } else {
2231             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2232             switch (lc->cu.part_mode) {
2233             case PART_2Nx2N:
2234                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2235                 break;
2236             case PART_2NxN:
2237                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2238                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2239                 break;
2240             case PART_Nx2N:
2241                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2242                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2243                 break;
2244             case PART_2NxnU:
2245                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2246                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2247                 break;
2248             case PART_2NxnD:
2249                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2250                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2251                 break;
2252             case PART_nLx2N:
2253                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2254                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2255                 break;
2256             case PART_nRx2N:
2257                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2258                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2259                 break;
2260             case PART_NxN:
2261                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2262                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2263                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2264                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2265                 break;
2266             }
2267         }
2268
2269         if (!pcm_flag) {
2270             int rqt_root_cbf = 1;
2271
2272             if (lc->cu.pred_mode != MODE_INTRA &&
2273                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2274                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2275             }
2276             if (rqt_root_cbf) {
2277                 const static int cbf[2] = { 0 };
2278                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2279                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2280                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2281                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2282                                          log2_cb_size,
2283                                          log2_cb_size, 0, 0, cbf, cbf);
2284                 if (ret < 0)
2285                     return ret;
2286             } else {
2287                 if (!s->sh.disable_deblocking_filter_flag)
2288                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2289             }
2290         }
2291     }
2292
2293     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2294         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2295
2296     x = y_cb * min_cb_width + x_cb;
2297     for (y = 0; y < length; y++) {
2298         memset(&s->qp_y_tab[x], lc->qp_y, length);
2299         x += min_cb_width;
2300     }
2301
2302     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2303        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2304         lc->qPy_pred = lc->qp_y;
2305     }
2306
2307     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2308
2309     return 0;
2310 }
2311
2312 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2313                                int log2_cb_size, int cb_depth)
2314 {
2315     HEVCLocalContext *lc = s->HEVClc;
2316     const int cb_size    = 1 << log2_cb_size;
2317     int ret;
2318     int split_cu;
2319
2320     lc->ct_depth = cb_depth;
2321     if (x0 + cb_size <= s->ps.sps->width  &&
2322         y0 + cb_size <= s->ps.sps->height &&
2323         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2324         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2325     } else {
2326         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2327     }
2328     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2329         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2330         lc->tu.is_cu_qp_delta_coded = 0;
2331         lc->tu.cu_qp_delta          = 0;
2332     }
2333
2334     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2335         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2336         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2337     }
2338
2339     if (split_cu) {
2340         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2341         const int cb_size_split = cb_size >> 1;
2342         const int x1 = x0 + cb_size_split;
2343         const int y1 = y0 + cb_size_split;
2344
2345         int more_data = 0;
2346
2347         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2348         if (more_data < 0)
2349             return more_data;
2350
2351         if (more_data && x1 < s->ps.sps->width) {
2352             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2353             if (more_data < 0)
2354                 return more_data;
2355         }
2356         if (more_data && y1 < s->ps.sps->height) {
2357             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2358             if (more_data < 0)
2359                 return more_data;
2360         }
2361         if (more_data && x1 < s->ps.sps->width &&
2362             y1 < s->ps.sps->height) {
2363             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2364             if (more_data < 0)
2365                 return more_data;
2366         }
2367
2368         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2369             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2370             lc->qPy_pred = lc->qp_y;
2371
2372         if (more_data)
2373             return ((x1 + cb_size_split) < s->ps.sps->width ||
2374                     (y1 + cb_size_split) < s->ps.sps->height);
2375         else
2376             return 0;
2377     } else {
2378         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2379         if (ret < 0)
2380             return ret;
2381         if ((!((x0 + cb_size) %
2382                (1 << (s->ps.sps->log2_ctb_size))) ||
2383              (x0 + cb_size >= s->ps.sps->width)) &&
2384             (!((y0 + cb_size) %
2385                (1 << (s->ps.sps->log2_ctb_size))) ||
2386              (y0 + cb_size >= s->ps.sps->height))) {
2387             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2388             return !end_of_slice_flag;
2389         } else {
2390             return 1;
2391         }
2392     }
2393
2394     return 0;
2395 }
2396
2397 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2398                                  int ctb_addr_ts)
2399 {
2400     HEVCLocalContext *lc  = s->HEVClc;
2401     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2402     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2403     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2404
2405     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2406
2407     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2408         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2409             lc->first_qp_group = 1;
2410         lc->end_of_tiles_x = s->ps.sps->width;
2411     } else if (s->ps.pps->tiles_enabled_flag) {
2412         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2413             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2414             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2415             lc->first_qp_group   = 1;
2416         }
2417     } else {
2418         lc->end_of_tiles_x = s->ps.sps->width;
2419     }
2420
2421     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2422
2423     lc->boundary_flags = 0;
2424     if (s->ps.pps->tiles_enabled_flag) {
2425         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2426             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2427         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2428             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2429         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2430             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2431         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2432             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2433     } else {
2434         if (ctb_addr_in_slice <= 0)
2435             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2436         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2437             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2438     }
2439
2440     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2441     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2442     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2443     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2444 }
2445
2446 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2447 {
2448     HEVCContext *s  = avctxt->priv_data;
2449     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2450     int more_data   = 1;
2451     int x_ctb       = 0;
2452     int y_ctb       = 0;
2453     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2454     int ret;
2455
2456     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2457         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2458         return AVERROR_INVALIDDATA;
2459     }
2460
2461     if (s->sh.dependent_slice_segment_flag) {
2462         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2463         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2464             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2465             return AVERROR_INVALIDDATA;
2466         }
2467     }
2468
2469     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2470         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2471
2472         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2473         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2474         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2475
2476         ret = ff_hevc_cabac_init(s, ctb_addr_ts, 0);
2477         if (ret < 0) {
2478             s->tab_slice_address[ctb_addr_rs] = -1;
2479             return ret;
2480         }
2481
2482         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2483
2484         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2485         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2486         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2487
2488         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2489         if (more_data < 0) {
2490             s->tab_slice_address[ctb_addr_rs] = -1;
2491             return more_data;
2492         }
2493
2494
2495         ctb_addr_ts++;
2496         ff_hevc_save_states(s, ctb_addr_ts);
2497         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2498     }
2499
2500     if (x_ctb + ctb_size >= s->ps.sps->width &&
2501         y_ctb + ctb_size >= s->ps.sps->height)
2502         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2503
2504     return ctb_addr_ts;
2505 }
2506
2507 static int hls_slice_data(HEVCContext *s)
2508 {
2509     int arg[2];
2510     int ret[2];
2511
2512     arg[0] = 0;
2513     arg[1] = 1;
2514
2515     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2516     return ret[0];
2517 }
2518 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2519 {
2520     HEVCContext *s1  = avctxt->priv_data, *s;
2521     HEVCLocalContext *lc;
2522     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2523     int more_data   = 1;
2524     int *ctb_row_p    = input_ctb_row;
2525     int ctb_row = ctb_row_p[job];
2526     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2527     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2528     int thread = ctb_row % s1->threads_number;
2529     int ret;
2530
2531     s = s1->sList[self_id];
2532     lc = s->HEVClc;
2533
2534     if(ctb_row) {
2535         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2536         if (ret < 0)
2537             goto error;
2538         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2539     }
2540
2541     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2542         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2543         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2544
2545         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2546
2547         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2548
2549         if (atomic_load(&s1->wpp_err)) {
2550             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2551             return 0;
2552         }
2553
2554         ret = ff_hevc_cabac_init(s, ctb_addr_ts, thread);
2555         if (ret < 0)
2556             goto error;
2557         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2558         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2559
2560         if (more_data < 0) {
2561             ret = more_data;
2562             goto error;
2563         }
2564
2565         ctb_addr_ts++;
2566
2567         ff_hevc_save_states(s, ctb_addr_ts);
2568         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2569         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2570
2571         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2572             atomic_store(&s1->wpp_err, 1);
2573             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2574             return 0;
2575         }
2576
2577         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2578             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2579             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2580             return ctb_addr_ts;
2581         }
2582         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2583         x_ctb+=ctb_size;
2584
2585         if(x_ctb >= s->ps.sps->width) {
2586             break;
2587         }
2588     }
2589     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2590
2591     return 0;
2592 error:
2593     s->tab_slice_address[ctb_addr_rs] = -1;
2594     atomic_store(&s1->wpp_err, 1);
2595     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2596     return ret;
2597 }
2598
2599 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2600 {
2601     const uint8_t *data = nal->data;
2602     int length          = nal->size;
2603     HEVCLocalContext *lc = s->HEVClc;
2604     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2605     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2606     int64_t offset;
2607     int64_t startheader, cmpt = 0;
2608     int i, j, res = 0;
2609
2610     if (!ret || !arg) {
2611         av_free(ret);
2612         av_free(arg);
2613         return AVERROR(ENOMEM);
2614     }
2615
2616     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2617         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2618             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2619             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2620         );
2621         res = AVERROR_INVALIDDATA;
2622         goto error;
2623     }
2624
2625     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2626
2627     if (!s->sList[1]) {
2628         for (i = 1; i < s->threads_number; i++) {
2629             s->sList[i] = av_malloc(sizeof(HEVCContext));
2630             memcpy(s->sList[i], s, sizeof(HEVCContext));
2631             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2632             s->sList[i]->HEVClc = s->HEVClcList[i];
2633         }
2634     }
2635
2636     offset = (lc->gb.index >> 3);
2637
2638     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2639         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2640             startheader--;
2641             cmpt++;
2642         }
2643     }
2644
2645     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2646         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2647         for (j = 0, cmpt = 0, startheader = offset
2648              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2649             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2650                 startheader--;
2651                 cmpt++;
2652             }
2653         }
2654         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2655         s->sh.offset[i - 1] = offset;
2656
2657     }
2658     if (s->sh.num_entry_point_offsets != 0) {
2659         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2660         if (length < offset) {
2661             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2662             res = AVERROR_INVALIDDATA;
2663             goto error;
2664         }
2665         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2666         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2667
2668     }
2669     s->data = data;
2670
2671     for (i = 1; i < s->threads_number; i++) {
2672         s->sList[i]->HEVClc->first_qp_group = 1;
2673         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2674         memcpy(s->sList[i], s, sizeof(HEVCContext));
2675         s->sList[i]->HEVClc = s->HEVClcList[i];
2676     }
2677
2678     atomic_store(&s->wpp_err, 0);
2679     ff_reset_entries(s->avctx);
2680
2681     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2682         arg[i] = i;
2683         ret[i] = 0;
2684     }
2685
2686     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2687         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2688
2689     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2690         res += ret[i];
2691 error:
2692     av_free(ret);
2693     av_free(arg);
2694     return res;
2695 }
2696
2697 static int set_side_data(HEVCContext *s)
2698 {
2699     AVFrame *out = s->ref->frame;
2700
2701     if (s->sei.frame_packing.present &&
2702         s->sei.frame_packing.arrangement_type >= 3 &&
2703         s->sei.frame_packing.arrangement_type <= 5 &&
2704         s->sei.frame_packing.content_interpretation_type > 0 &&
2705         s->sei.frame_packing.content_interpretation_type < 3) {
2706         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2707         if (!stereo)
2708             return AVERROR(ENOMEM);
2709
2710         switch (s->sei.frame_packing.arrangement_type) {
2711         case 3:
2712             if (s->sei.frame_packing.quincunx_subsampling)
2713                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2714             else
2715                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2716             break;
2717         case 4:
2718             stereo->type = AV_STEREO3D_TOPBOTTOM;
2719             break;
2720         case 5:
2721             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2722             break;
2723         }
2724
2725         if (s->sei.frame_packing.content_interpretation_type == 2)
2726             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2727
2728         if (s->sei.frame_packing.arrangement_type == 5) {
2729             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2730                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2731             else
2732                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2733         }
2734     }
2735
2736     if (s->sei.display_orientation.present &&
2737         (s->sei.display_orientation.anticlockwise_rotation ||
2738          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2739         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2740         AVFrameSideData *rotation = av_frame_new_side_data(out,
2741                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2742                                                            sizeof(int32_t) * 9);
2743         if (!rotation)
2744             return AVERROR(ENOMEM);
2745
2746         av_display_rotation_set((int32_t *)rotation->data, angle);
2747         av_display_matrix_flip((int32_t *)rotation->data,
2748                                s->sei.display_orientation.hflip,
2749                                s->sei.display_orientation.vflip);
2750     }
2751
2752     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2753     // so the side data persists for the entire coded video sequence.
2754     if (s->sei.mastering_display.present > 0 &&
2755         IS_IRAP(s) && s->no_rasl_output_flag) {
2756         s->sei.mastering_display.present--;
2757     }
2758     if (s->sei.mastering_display.present) {
2759         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2760         const int mapping[3] = {2, 0, 1};
2761         const int chroma_den = 50000;
2762         const int luma_den = 10000;
2763         int i;
2764         AVMasteringDisplayMetadata *metadata =
2765             av_mastering_display_metadata_create_side_data(out);
2766         if (!metadata)
2767             return AVERROR(ENOMEM);
2768
2769         for (i = 0; i < 3; i++) {
2770             const int j = mapping[i];
2771             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2772             metadata->display_primaries[i][0].den = chroma_den;
2773             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2774             metadata->display_primaries[i][1].den = chroma_den;
2775         }
2776         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2777         metadata->white_point[0].den = chroma_den;
2778         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2779         metadata->white_point[1].den = chroma_den;
2780
2781         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2782         metadata->max_luminance.den = luma_den;
2783         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2784         metadata->min_luminance.den = luma_den;
2785         metadata->has_luminance = 1;
2786         metadata->has_primaries = 1;
2787
2788         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2789         av_log(s->avctx, AV_LOG_DEBUG,
2790                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2791                av_q2d(metadata->display_primaries[0][0]),
2792                av_q2d(metadata->display_primaries[0][1]),
2793                av_q2d(metadata->display_primaries[1][0]),
2794                av_q2d(metadata->display_primaries[1][1]),
2795                av_q2d(metadata->display_primaries[2][0]),
2796                av_q2d(metadata->display_primaries[2][1]),
2797                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2798         av_log(s->avctx, AV_LOG_DEBUG,
2799                "min_luminance=%f, max_luminance=%f\n",
2800                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2801     }
2802     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2803     // so the side data persists for the entire coded video sequence.
2804     if (s->sei.content_light.present > 0 &&
2805         IS_IRAP(s) && s->no_rasl_output_flag) {
2806         s->sei.content_light.present--;
2807     }
2808     if (s->sei.content_light.present) {
2809         AVContentLightMetadata *metadata =
2810             av_content_light_metadata_create_side_data(out);
2811         if (!metadata)
2812             return AVERROR(ENOMEM);
2813         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2814         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2815
2816         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2817         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2818                metadata->MaxCLL, metadata->MaxFALL);
2819     }
2820
2821     if (s->sei.a53_caption.buf_ref) {
2822         HEVCSEIA53Caption *a53 = &s->sei.a53_caption;
2823
2824         AVFrameSideData *sd = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_A53_CC, a53->buf_ref);
2825         if (!sd)
2826             av_buffer_unref(&a53->buf_ref);
2827         a53->buf_ref = NULL;
2828     }
2829
2830     for (int i = 0; i < s->sei.unregistered.nb_buf_ref; i++) {
2831         HEVCSEIUnregistered *unreg = &s->sei.unregistered;
2832
2833         if (unreg->buf_ref[i]) {
2834             AVFrameSideData *sd = av_frame_new_side_data_from_buf(out,
2835                     AV_FRAME_DATA_SEI_UNREGISTERED,
2836                     unreg->buf_ref[i]);
2837             if (!sd)
2838                 av_buffer_unref(&unreg->buf_ref[i]);
2839             unreg->buf_ref[i] = NULL;
2840         }
2841     }
2842     s->sei.unregistered.nb_buf_ref = 0;
2843
2844     if (s->sei.timecode.present) {
2845         uint32_t *tc_sd;
2846         char tcbuf[AV_TIMECODE_STR_SIZE];
2847         AVFrameSideData *tcside = av_frame_new_side_data(out, AV_FRAME_DATA_S12M_TIMECODE,
2848                                                          sizeof(uint32_t) * 4);
2849         if (!tcside)
2850             return AVERROR(ENOMEM);
2851
2852         tc_sd = (uint32_t*)tcside->data;
2853         tc_sd[0] = s->sei.timecode.num_clock_ts;
2854
2855         for (int i = 0; i < tc_sd[0]; i++) {
2856             int drop = s->sei.timecode.cnt_dropped_flag[i];
2857             int   hh = s->sei.timecode.hours_value[i];
2858             int   mm = s->sei.timecode.minutes_value[i];
2859             int   ss = s->sei.timecode.seconds_value[i];
2860             int   ff = s->sei.timecode.n_frames[i];
2861
2862             tc_sd[i + 1] = av_timecode_get_smpte(s->avctx->framerate, drop, hh, mm, ss, ff);
2863             av_timecode_make_smpte_tc_string2(tcbuf, s->avctx->framerate, tc_sd[i + 1], 0, 0);
2864             av_dict_set(&out->metadata, "timecode", tcbuf, 0);
2865         }
2866
2867         s->sei.timecode.num_clock_ts = 0;
2868     }
2869
2870     if (s->sei.dynamic_hdr_plus.info) {
2871         AVBufferRef *info_ref = av_buffer_ref(s->sei.dynamic_hdr_plus.info);
2872         if (!info_ref)
2873             return AVERROR(ENOMEM);
2874
2875         if (!av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DYNAMIC_HDR_PLUS, info_ref)) {
2876             av_buffer_unref(&info_ref);
2877             return AVERROR(ENOMEM);
2878         }
2879     }
2880
2881     return 0;
2882 }
2883
2884 static int hevc_frame_start(HEVCContext *s)
2885 {
2886     HEVCLocalContext *lc = s->HEVClc;
2887     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2888                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2889     int ret;
2890
2891     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2892     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2893     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2894     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2895     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2896
2897     s->is_decoded        = 0;
2898     s->first_nal_type    = s->nal_unit_type;
2899
2900     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2901
2902     if (s->ps.pps->tiles_enabled_flag)
2903         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2904
2905     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2906     if (ret < 0)
2907         goto fail;
2908
2909     ret = ff_hevc_frame_rps(s);
2910     if (ret < 0) {
2911         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2912         goto fail;
2913     }
2914
2915     s->ref->frame->key_frame = IS_IRAP(s);
2916
2917     ret = set_side_data(s);
2918     if (ret < 0)
2919         goto fail;
2920
2921     s->frame->pict_type = 3 - s->sh.slice_type;
2922
2923     if (!IS_IRAP(s))
2924         ff_hevc_bump_frame(s);
2925
2926     av_frame_unref(s->output_frame);
2927     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2928     if (ret < 0)
2929         goto fail;
2930
2931     if (!s->avctx->hwaccel)
2932         ff_thread_finish_setup(s->avctx);
2933
2934     return 0;
2935
2936 fail:
2937     if (s->ref)
2938         ff_hevc_unref_frame(s, s->ref, ~0);
2939     s->ref = NULL;
2940     return ret;
2941 }
2942
2943 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2944 {
2945     HEVCLocalContext *lc = s->HEVClc;
2946     GetBitContext *gb    = &lc->gb;
2947     int ctb_addr_ts, ret;
2948
2949     *gb              = nal->gb;
2950     s->nal_unit_type = nal->type;
2951     s->temporal_id   = nal->temporal_id;
2952
2953     switch (s->nal_unit_type) {
2954     case HEVC_NAL_VPS:
2955         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2956             ret = s->avctx->hwaccel->decode_params(s->avctx,
2957                                                    nal->type,
2958                                                    nal->raw_data,
2959                                                    nal->raw_size);
2960             if (ret < 0)
2961                 goto fail;
2962         }
2963         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2964         if (ret < 0)
2965             goto fail;
2966         break;
2967     case HEVC_NAL_SPS:
2968         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2969             ret = s->avctx->hwaccel->decode_params(s->avctx,
2970                                                    nal->type,
2971                                                    nal->raw_data,
2972                                                    nal->raw_size);
2973             if (ret < 0)
2974                 goto fail;
2975         }
2976         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2977                                      s->apply_defdispwin);
2978         if (ret < 0)
2979             goto fail;
2980         break;
2981     case HEVC_NAL_PPS:
2982         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2983             ret = s->avctx->hwaccel->decode_params(s->avctx,
2984                                                    nal->type,
2985                                                    nal->raw_data,
2986                                                    nal->raw_size);
2987             if (ret < 0)
2988                 goto fail;
2989         }
2990         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2991         if (ret < 0)
2992             goto fail;
2993         break;
2994     case HEVC_NAL_SEI_PREFIX:
2995     case HEVC_NAL_SEI_SUFFIX:
2996         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2997             ret = s->avctx->hwaccel->decode_params(s->avctx,
2998                                                    nal->type,
2999                                                    nal->raw_data,
3000                                                    nal->raw_size);
3001             if (ret < 0)
3002                 goto fail;
3003         }
3004         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
3005         if (ret < 0)
3006             goto fail;
3007         break;
3008     case HEVC_NAL_TRAIL_R:
3009     case HEVC_NAL_TRAIL_N:
3010     case HEVC_NAL_TSA_N:
3011     case HEVC_NAL_TSA_R:
3012     case HEVC_NAL_STSA_N:
3013     case HEVC_NAL_STSA_R:
3014     case HEVC_NAL_BLA_W_LP:
3015     case HEVC_NAL_BLA_W_RADL:
3016     case HEVC_NAL_BLA_N_LP:
3017     case HEVC_NAL_IDR_W_RADL:
3018     case HEVC_NAL_IDR_N_LP:
3019     case HEVC_NAL_CRA_NUT:
3020     case HEVC_NAL_RADL_N:
3021     case HEVC_NAL_RADL_R:
3022     case HEVC_NAL_RASL_N:
3023     case HEVC_NAL_RASL_R:
3024         ret = hls_slice_header(s);
3025         if (ret < 0)
3026             return ret;
3027         if (ret == 1) {
3028             ret = AVERROR_INVALIDDATA;
3029             goto fail;
3030         }
3031
3032
3033         if (
3034             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
3035             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
3036             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
3037             break;
3038         }
3039
3040         if (s->sh.first_slice_in_pic_flag) {
3041             if (s->max_ra == INT_MAX) {
3042                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
3043                     s->max_ra = s->poc;
3044                 } else {
3045                     if (IS_IDR(s))
3046                         s->max_ra = INT_MIN;
3047                 }
3048             }
3049
3050             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
3051                 s->poc <= s->max_ra) {
3052                 s->is_decoded = 0;
3053                 break;
3054             } else {
3055                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
3056                     s->max_ra = INT_MIN;
3057             }
3058
3059             s->overlap ++;
3060             ret = hevc_frame_start(s);
3061             if (ret < 0)
3062                 return ret;
3063         } else if (!s->ref) {
3064             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
3065             goto fail;
3066         }
3067
3068         if (s->nal_unit_type != s->first_nal_type) {
3069             av_log(s->avctx, AV_LOG_ERROR,
3070                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
3071                    s->first_nal_type, s->nal_unit_type);
3072             return AVERROR_INVALIDDATA;
3073         }
3074
3075         if (!s->sh.dependent_slice_segment_flag &&
3076             s->sh.slice_type != HEVC_SLICE_I) {
3077             ret = ff_hevc_slice_rpl(s);
3078             if (ret < 0) {
3079                 av_log(s->avctx, AV_LOG_WARNING,
3080                        "Error constructing the reference lists for the current slice.\n");
3081                 goto fail;
3082             }
3083         }
3084
3085         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
3086             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
3087             if (ret < 0)
3088                 goto fail;
3089         }
3090
3091         if (s->avctx->hwaccel) {
3092             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
3093             if (ret < 0)
3094                 goto fail;
3095         } else {
3096             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
3097                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
3098             else
3099                 ctb_addr_ts = hls_slice_data(s);
3100             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3101                 s->is_decoded = 1;
3102             }
3103
3104             if (ctb_addr_ts < 0) {
3105                 ret = ctb_addr_ts;
3106                 goto fail;
3107             }
3108         }
3109         break;
3110     case HEVC_NAL_EOS_NUT:
3111     case HEVC_NAL_EOB_NUT:
3112         s->seq_decode = (s->seq_decode + 1) & 0xff;
3113         s->max_ra     = INT_MAX;
3114         break;
3115     case HEVC_NAL_AUD:
3116     case HEVC_NAL_FD_NUT:
3117         break;
3118     default:
3119         av_log(s->avctx, AV_LOG_INFO,
3120                "Skipping NAL unit %d\n", s->nal_unit_type);
3121     }
3122
3123     return 0;
3124 fail:
3125     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3126         return ret;
3127     return 0;
3128 }
3129
3130 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3131 {
3132     int i, ret = 0;
3133     int eos_at_start = 1;
3134
3135     s->ref = NULL;
3136     s->last_eos = s->eos;
3137     s->eos = 0;
3138     s->overlap = 0;
3139
3140     /* split the input packet into NAL units, so we know the upper bound on the
3141      * number of slices in the frame */
3142     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3143                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3144     if (ret < 0) {
3145         av_log(s->avctx, AV_LOG_ERROR,
3146                "Error splitting the input into NAL units.\n");
3147         return ret;
3148     }
3149
3150     for (i = 0; i < s->pkt.nb_nals; i++) {
3151         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3152             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3153             if (eos_at_start) {
3154                 s->last_eos = 1;
3155             } else {
3156                 s->eos = 1;
3157             }
3158         } else {
3159             eos_at_start = 0;
3160         }
3161     }
3162
3163     /* decode the NAL units */
3164     for (i = 0; i < s->pkt.nb_nals; i++) {
3165         H2645NAL *nal = &s->pkt.nals[i];
3166
3167         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3168             (s->avctx->skip_frame >= AVDISCARD_NONREF
3169             && ff_hevc_nal_is_nonref(nal->type)) || nal->nuh_layer_id > 0)
3170             continue;
3171
3172         ret = decode_nal_unit(s, nal);
3173         if (ret >= 0 && s->overlap > 2)
3174             ret = AVERROR_INVALIDDATA;
3175         if (ret < 0) {
3176             av_log(s->avctx, AV_LOG_WARNING,
3177                    "Error parsing NAL unit #%d.\n", i);
3178             goto fail;
3179         }
3180     }
3181
3182 fail:
3183     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3184         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3185
3186     return ret;
3187 }
3188
3189 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3190 {
3191     int i;
3192     for (i = 0; i < 16; i++)
3193         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3194 }
3195
3196 static int verify_md5(HEVCContext *s, AVFrame *frame)
3197 {
3198     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3199     int pixel_shift;
3200     int i, j;
3201
3202     if (!desc)
3203         return AVERROR(EINVAL);
3204
3205     pixel_shift = desc->comp[0].depth > 8;
3206
3207     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3208            s->poc);
3209
3210     /* the checksums are LE, so we have to byteswap for >8bpp formats
3211      * on BE arches */
3212 #if HAVE_BIGENDIAN
3213     if (pixel_shift && !s->checksum_buf) {
3214         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3215                        FFMAX3(frame->linesize[0], frame->linesize[1],
3216                               frame->linesize[2]));
3217         if (!s->checksum_buf)
3218             return AVERROR(ENOMEM);
3219     }
3220 #endif
3221
3222     for (i = 0; frame->data[i]; i++) {
3223         int width  = s->avctx->coded_width;
3224         int height = s->avctx->coded_height;
3225         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3226         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3227         uint8_t md5[16];
3228
3229         av_md5_init(s->md5_ctx);
3230         for (j = 0; j < h; j++) {
3231             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3232 #if HAVE_BIGENDIAN
3233             if (pixel_shift) {
3234                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3235                                     (const uint16_t *) src, w);
3236                 src = s->checksum_buf;
3237             }
3238 #endif
3239             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3240         }
3241         av_md5_final(s->md5_ctx, md5);
3242
3243         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3244             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3245             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3246             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3247         } else {
3248             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3249             print_md5(s->avctx, AV_LOG_ERROR, md5);
3250             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3251             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3252             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3253             return AVERROR_INVALIDDATA;
3254         }
3255     }
3256
3257     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3258
3259     return 0;
3260 }
3261
3262 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3263 {
3264     int ret, i;
3265
3266     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3267                                    &s->nal_length_size, s->avctx->err_recognition,
3268                                    s->apply_defdispwin, s->avctx);
3269     if (ret < 0)
3270         return ret;
3271
3272     /* export stream parameters from the first SPS */
3273     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3274         if (first && s->ps.sps_list[i]) {
3275             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3276             export_stream_params(s, sps);
3277             break;
3278         }
3279     }
3280
3281     /* export stream parameters from SEI */
3282     ret = export_stream_params_from_sei(s);
3283     if (ret < 0)
3284         return ret;
3285
3286     return 0;
3287 }
3288
3289 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3290                              AVPacket *avpkt)
3291 {
3292     int ret;
3293     int new_extradata_size;
3294     uint8_t *new_extradata;
3295     HEVCContext *s = avctx->priv_data;
3296
3297     if (!avpkt->size) {
3298         ret = ff_hevc_output_frame(s, data, 1);
3299         if (ret < 0)
3300             return ret;
3301
3302         *got_output = ret;
3303         return 0;
3304     }
3305
3306     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3307                                             &new_extradata_size);
3308     if (new_extradata && new_extradata_size > 0) {
3309         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3310         if (ret < 0)
3311             return ret;
3312     }
3313
3314     s->ref = NULL;
3315     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3316     if (ret < 0)
3317         return ret;
3318
3319     if (avctx->hwaccel) {
3320         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3321             av_log(avctx, AV_LOG_ERROR,
3322                    "hardware accelerator failed to decode picture\n");
3323             ff_hevc_unref_frame(s, s->ref, ~0);
3324             return ret;
3325         }
3326     } else {
3327         /* verify the SEI checksum */
3328         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3329             s->sei.picture_hash.is_md5) {
3330             ret = verify_md5(s, s->ref->frame);
3331             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3332                 ff_hevc_unref_frame(s, s->ref, ~0);
3333                 return ret;
3334             }
3335         }
3336     }
3337     s->sei.picture_hash.is_md5 = 0;
3338
3339     if (s->is_decoded) {
3340         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3341         s->is_decoded = 0;
3342     }
3343
3344     if (s->output_frame->buf[0]) {
3345         av_frame_move_ref(data, s->output_frame);
3346         *got_output = 1;
3347     }
3348
3349     return avpkt->size;
3350 }
3351
3352 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3353 {
3354     int ret;
3355
3356     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3357     if (ret < 0)
3358         return ret;
3359
3360     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3361     if (!dst->tab_mvf_buf)
3362         goto fail;
3363     dst->tab_mvf = src->tab_mvf;
3364
3365     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3366     if (!dst->rpl_tab_buf)
3367         goto fail;
3368     dst->rpl_tab = src->rpl_tab;
3369
3370     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3371     if (!dst->rpl_buf)
3372         goto fail;
3373
3374     dst->poc        = src->poc;
3375     dst->ctb_count  = src->ctb_count;
3376     dst->flags      = src->flags;
3377     dst->sequence   = src->sequence;
3378
3379     if (src->hwaccel_picture_private) {
3380         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3381         if (!dst->hwaccel_priv_buf)
3382             goto fail;
3383         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3384     }
3385
3386     return 0;
3387 fail:
3388     ff_hevc_unref_frame(s, dst, ~0);
3389     return AVERROR(ENOMEM);
3390 }
3391
3392 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3393 {
3394     HEVCContext       *s = avctx->priv_data;
3395     int i;
3396
3397     pic_arrays_free(s);
3398
3399     av_freep(&s->md5_ctx);
3400
3401     av_freep(&s->cabac_state);
3402
3403     for (i = 0; i < 3; i++) {
3404         av_freep(&s->sao_pixel_buffer_h[i]);
3405         av_freep(&s->sao_pixel_buffer_v[i]);
3406     }
3407     av_frame_free(&s->output_frame);
3408
3409     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3410         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3411         av_frame_free(&s->DPB[i].frame);
3412     }
3413
3414     ff_hevc_ps_uninit(&s->ps);
3415
3416     av_freep(&s->sh.entry_point_offset);
3417     av_freep(&s->sh.offset);
3418     av_freep(&s->sh.size);
3419
3420     for (i = 1; i < s->threads_number; i++) {
3421         HEVCLocalContext *lc = s->HEVClcList[i];
3422         if (lc) {
3423             av_freep(&s->HEVClcList[i]);
3424             av_freep(&s->sList[i]);
3425         }
3426     }
3427     if (s->HEVClc == s->HEVClcList[0])
3428         s->HEVClc = NULL;
3429     av_freep(&s->HEVClcList[0]);
3430     av_freep(&s->HEVClcList);
3431     av_freep(&s->sList);
3432
3433     ff_h2645_packet_uninit(&s->pkt);
3434
3435     ff_hevc_reset_sei(&s->sei);
3436
3437     return 0;
3438 }
3439
3440 static av_cold int hevc_init_context(AVCodecContext *avctx)
3441 {
3442     HEVCContext *s = avctx->priv_data;
3443     int i;
3444
3445     s->avctx = avctx;
3446
3447     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3448     s->HEVClcList = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number);
3449     s->sList = av_mallocz(sizeof(HEVCContext*) * s->threads_number);
3450     if (!s->HEVClc || !s->HEVClcList || !s->sList)
3451         goto fail;
3452     s->HEVClcList[0] = s->HEVClc;
3453     s->sList[0] = s;
3454
3455     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3456     if (!s->cabac_state)
3457         goto fail;
3458
3459     s->output_frame = av_frame_alloc();
3460     if (!s->output_frame)
3461         goto fail;
3462
3463     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3464         s->DPB[i].frame = av_frame_alloc();
3465         if (!s->DPB[i].frame)
3466             goto fail;
3467         s->DPB[i].tf.f = s->DPB[i].frame;
3468     }
3469
3470     s->max_ra = INT_MAX;
3471
3472     s->md5_ctx = av_md5_alloc();
3473     if (!s->md5_ctx)
3474         goto fail;
3475
3476     ff_bswapdsp_init(&s->bdsp);
3477
3478     s->context_initialized = 1;
3479     s->eos = 0;
3480
3481     ff_hevc_reset_sei(&s->sei);
3482
3483     return 0;
3484
3485 fail:
3486     hevc_decode_free(avctx);
3487     return AVERROR(ENOMEM);
3488 }
3489
3490 #if HAVE_THREADS
3491 static int hevc_update_thread_context(AVCodecContext *dst,
3492                                       const AVCodecContext *src)
3493 {
3494     HEVCContext *s  = dst->priv_data;
3495     HEVCContext *s0 = src->priv_data;
3496     int i, ret;
3497
3498     if (!s->context_initialized) {
3499         ret = hevc_init_context(dst);
3500         if (ret < 0)
3501             return ret;
3502     }
3503
3504     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3505         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3506         if (s0->DPB[i].frame->buf[0]) {
3507             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3508             if (ret < 0)
3509                 return ret;
3510         }
3511     }
3512
3513     if (s->ps.sps != s0->ps.sps)
3514         s->ps.sps = NULL;
3515     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3516         ret = av_buffer_replace(&s->ps.vps_list[i], s0->ps.vps_list[i]);
3517         if (ret < 0)
3518             return ret;
3519     }
3520
3521     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3522         ret = av_buffer_replace(&s->ps.sps_list[i], s0->ps.sps_list[i]);
3523         if (ret < 0)
3524             return ret;
3525     }
3526
3527     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3528         ret = av_buffer_replace(&s->ps.pps_list[i], s0->ps.pps_list[i]);
3529         if (ret < 0)
3530             return ret;
3531     }
3532
3533     if (s->ps.sps != s0->ps.sps)
3534         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3535             return ret;
3536
3537     s->seq_decode = s0->seq_decode;
3538     s->seq_output = s0->seq_output;
3539     s->pocTid0    = s0->pocTid0;
3540     s->max_ra     = s0->max_ra;
3541     s->eos        = s0->eos;
3542     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3543
3544     s->is_nalff        = s0->is_nalff;
3545     s->nal_length_size = s0->nal_length_size;
3546
3547     s->threads_number      = s0->threads_number;
3548     s->threads_type        = s0->threads_type;
3549
3550     if (s0->eos) {
3551         s->seq_decode = (s->seq_decode + 1) & 0xff;
3552         s->max_ra = INT_MAX;
3553     }
3554
3555     ret = av_buffer_replace(&s->sei.a53_caption.buf_ref, s0->sei.a53_caption.buf_ref);
3556     if (ret < 0)
3557         return ret;
3558
3559     for (i = 0; i < s->sei.unregistered.nb_buf_ref; i++)
3560         av_buffer_unref(&s->sei.unregistered.buf_ref[i]);
3561     s->sei.unregistered.nb_buf_ref = 0;
3562
3563     if (s0->sei.unregistered.nb_buf_ref) {
3564         ret = av_reallocp_array(&s->sei.unregistered.buf_ref,
3565                                 s0->sei.unregistered.nb_buf_ref,
3566                                 sizeof(*s->sei.unregistered.buf_ref));
3567         if (ret < 0)
3568             return ret;
3569
3570         for (i = 0; i < s0->sei.unregistered.nb_buf_ref; i++) {
3571             s->sei.unregistered.buf_ref[i] = av_buffer_ref(s0->sei.unregistered.buf_ref[i]);
3572             if (!s->sei.unregistered.buf_ref[i])
3573                 return AVERROR(ENOMEM);
3574             s->sei.unregistered.nb_buf_ref++;
3575         }
3576     }
3577
3578     ret = av_buffer_replace(&s->sei.dynamic_hdr_plus.info, s0->sei.dynamic_hdr_plus.info);
3579     if (ret < 0)
3580         return ret;
3581
3582     s->sei.frame_packing        = s0->sei.frame_packing;
3583     s->sei.display_orientation  = s0->sei.display_orientation;
3584     s->sei.mastering_display    = s0->sei.mastering_display;
3585     s->sei.content_light        = s0->sei.content_light;
3586     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3587
3588     ret = export_stream_params_from_sei(s);
3589     if (ret < 0)
3590         return ret;
3591
3592     return 0;
3593 }
3594 #endif
3595
3596 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3597 {
3598     HEVCContext *s = avctx->priv_data;
3599     int ret;
3600
3601     if(avctx->active_thread_type & FF_THREAD_SLICE)
3602         s->threads_number = avctx->thread_count;
3603     else
3604         s->threads_number = 1;
3605
3606     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3607         s->threads_type = FF_THREAD_FRAME;
3608     else
3609         s->threads_type = FF_THREAD_SLICE;
3610
3611     ret = hevc_init_context(avctx);
3612     if (ret < 0)
3613         return ret;
3614
3615     s->enable_parallel_tiles = 0;
3616     s->sei.picture_timing.picture_struct = 0;
3617     s->eos = 1;
3618
3619     atomic_init(&s->wpp_err, 0);
3620
3621     if (!avctx->internal->is_copy) {
3622         if (avctx->extradata_size > 0 && avctx->extradata) {
3623             ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3624             if (ret < 0) {
3625                 hevc_decode_free(avctx);
3626                 return ret;
3627             }
3628         }
3629     }
3630
3631     return 0;
3632 }
3633
3634 static void hevc_decode_flush(AVCodecContext *avctx)
3635 {
3636     HEVCContext *s = avctx->priv_data;
3637     ff_hevc_flush_dpb(s);
3638     ff_hevc_reset_sei(&s->sei);
3639     s->max_ra = INT_MAX;
3640     s->eos = 1;
3641 }
3642
3643 #define OFFSET(x) offsetof(HEVCContext, x)
3644 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3645
3646 static const AVOption options[] = {
3647     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3648         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3649     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3650         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3651     { NULL },
3652 };
3653
3654 static const AVClass hevc_decoder_class = {
3655     .class_name = "HEVC decoder",
3656     .item_name  = av_default_item_name,
3657     .option     = options,
3658     .version    = LIBAVUTIL_VERSION_INT,
3659 };
3660
3661 AVCodec ff_hevc_decoder = {
3662     .name                  = "hevc",
3663     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3664     .type                  = AVMEDIA_TYPE_VIDEO,
3665     .id                    = AV_CODEC_ID_HEVC,
3666     .priv_data_size        = sizeof(HEVCContext),
3667     .priv_class            = &hevc_decoder_class,
3668     .init                  = hevc_decode_init,
3669     .close                 = hevc_decode_free,
3670     .decode                = hevc_decode_frame,
3671     .flush                 = hevc_decode_flush,
3672     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3673     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3674                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3675     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING |
3676                              FF_CODEC_CAP_ALLOCATE_PROGRESS,
3677     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3678     .hw_configs            = (const AVCodecHWConfigInternal *const []) {
3679 #if CONFIG_HEVC_DXVA2_HWACCEL
3680                                HWACCEL_DXVA2(hevc),
3681 #endif
3682 #if CONFIG_HEVC_D3D11VA_HWACCEL
3683                                HWACCEL_D3D11VA(hevc),
3684 #endif
3685 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3686                                HWACCEL_D3D11VA2(hevc),
3687 #endif
3688 #if CONFIG_HEVC_NVDEC_HWACCEL
3689                                HWACCEL_NVDEC(hevc),
3690 #endif
3691 #if CONFIG_HEVC_VAAPI_HWACCEL
3692                                HWACCEL_VAAPI(hevc),
3693 #endif
3694 #if CONFIG_HEVC_VDPAU_HWACCEL
3695                                HWACCEL_VDPAU(hevc),
3696 #endif
3697 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3698                                HWACCEL_VIDEOTOOLBOX(hevc),
3699 #endif
3700                                NULL
3701                            },
3702 };