git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41 #include "hevc_data.h"
  42 #include "hevc_parse.h"
  43 #include "hevcdec.h"
  44 #include "hwaccel.h"
  45 #include "profiles.h"
  46
  47 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  48
  49 /**
  50  * NOTE: Each function hls_foo correspond to the function foo in the
  51  * specification (HLS stands for High Level Syntax).
  52  */
  53
  54 /**
  55  * Section 5.7
  56  */
  57
  58 /* free everything allocated  by pic_arrays_init() */
  59 static void pic_arrays_free(HEVCContext *s)
  60 {
  61     av_freep(&s->sao);
  62     av_freep(&s->deblock);
  63
  64     av_freep(&s->skip_flag);
  65     av_freep(&s->tab_ct_depth);
  66
  67     av_freep(&s->tab_ipm);
  68     av_freep(&s->cbf_luma);
  69     av_freep(&s->is_pcm);
  70
  71     av_freep(&s->qp_y_tab);
  72     av_freep(&s->tab_slice_address);
  73     av_freep(&s->filter_slice_edges);
  74
  75     av_freep(&s->horizontal_bs);
  76     av_freep(&s->vertical_bs);
  77
  78     av_freep(&s->sh.entry_point_offset);
  79     av_freep(&s->sh.size);
  80     av_freep(&s->sh.offset);
  81
  82     av_buffer_pool_uninit(&s->tab_mvf_pool);
  83     av_buffer_pool_uninit(&s->rpl_tab_pool);
  84 }
  85
  86 /* allocate arrays that depend on frame dimensions */
  87 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  88 {
  89     int log2_min_cb_size = sps->log2_min_cb_size;
  90     int width            = sps->width;
  91     int height           = sps->height;
  92     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  93                            ((height >> log2_min_cb_size) + 1);
  94     int ctb_count        = sps->ctb_width * sps->ctb_height;
  95     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  96
  97     s->bs_width  = (width  >> 2) + 1;
  98     s->bs_height = (height >> 2) + 1;
  99
 100     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
 101     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 102     if (!s->sao || !s->deblock)
 103         goto fail;
 104
 105     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 106     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 107     if (!s->skip_flag || !s->tab_ct_depth)
 108         goto fail;
 109
 110     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 111     s->tab_ipm  = av_mallocz(min_pu_size);
 112     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 113     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 114         goto fail;
 115
 116     s->filter_slice_edges = av_mallocz(ctb_count);
 117     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 118                                       sizeof(*s->tab_slice_address));
 119     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 120                                       sizeof(*s->qp_y_tab));
 121     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 122         goto fail;
 123
 124     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 125     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 126     if (!s->horizontal_bs || !s->vertical_bs)
 127         goto fail;
 128
 129     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 130                                           av_buffer_allocz);
 131     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 132                                           av_buffer_allocz);
 133     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 134         goto fail;
 135
 136     return 0;
 137
 138 fail:
 139     pic_arrays_free(s);
 140     return AVERROR(ENOMEM);
 141 }
 142
 143 static int pred_weight_table(HEVCContext *s, GetBitContext *gb)
 144 {
 145     int i = 0;
 146     int j = 0;
 147     uint8_t luma_weight_l0_flag[16];
 148     uint8_t chroma_weight_l0_flag[16];
 149     uint8_t luma_weight_l1_flag[16];
 150     uint8_t chroma_weight_l1_flag[16];
 151     int luma_log2_weight_denom;
 152
 153     luma_log2_weight_denom = get_ue_golomb_long(gb);
 154     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7) {
 155         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 156         return AVERROR_INVALIDDATA;
 157     }
 158     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 159     if (s->ps.sps->chroma_format_idc != 0) {
 160         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb);
 161         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
 162             av_log(s->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %"PRId64" is invalid\n", chroma_log2_weight_denom);
 163             return AVERROR_INVALIDDATA;
 164         }
 165         s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
 166     }
 167
 168     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 169         luma_weight_l0_flag[i] = get_bits1(gb);
 170         if (!luma_weight_l0_flag[i]) {
 171             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 172             s->sh.luma_offset_l0[i] = 0;
 173         }
 174     }
 175     if (s->ps.sps->chroma_format_idc != 0) {
 176         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 177             chroma_weight_l0_flag[i] = get_bits1(gb);
 178     } else {
 179         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 180             chroma_weight_l0_flag[i] = 0;
 181     }
 182     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 183         if (luma_weight_l0_flag[i]) {
 184             int delta_luma_weight_l0 = get_se_golomb(gb);
 185             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 186             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 187         }
 188         if (chroma_weight_l0_flag[i]) {
 189             for (j = 0; j < 2; j++) {
 190                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 191                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 192
 193                 if (   (int8_t)delta_chroma_weight_l0 != delta_chroma_weight_l0
 194                     || delta_chroma_offset_l0 < -(1<<17) || delta_chroma_offset_l0 > (1<<17)) {
 195                     return AVERROR_INVALIDDATA;
 196                 }
 197
 198                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 199                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 200                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 201             }
 202         } else {
 203             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 204             s->sh.chroma_offset_l0[i][0] = 0;
 205             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 206             s->sh.chroma_offset_l0[i][1] = 0;
 207         }
 208     }
 209     if (s->sh.slice_type == HEVC_SLICE_B) {
 210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 211             luma_weight_l1_flag[i] = get_bits1(gb);
 212             if (!luma_weight_l1_flag[i]) {
 213                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 214                 s->sh.luma_offset_l1[i] = 0;
 215             }
 216         }
 217         if (s->ps.sps->chroma_format_idc != 0) {
 218             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 219                 chroma_weight_l1_flag[i] = get_bits1(gb);
 220         } else {
 221             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 222                 chroma_weight_l1_flag[i] = 0;
 223         }
 224         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 225             if (luma_weight_l1_flag[i]) {
 226                 int delta_luma_weight_l1 = get_se_golomb(gb);
 227                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 228                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 229             }
 230             if (chroma_weight_l1_flag[i]) {
 231                 for (j = 0; j < 2; j++) {
 232                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 233                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 234
 235                     if (   (int8_t)delta_chroma_weight_l1 != delta_chroma_weight_l1
 236                         || delta_chroma_offset_l1 < -(1<<17) || delta_chroma_offset_l1 > (1<<17)) {
 237                         return AVERROR_INVALIDDATA;
 238                     }
 239
 240                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 241                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 242                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 243                 }
 244             } else {
 245                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 246                 s->sh.chroma_offset_l1[i][0] = 0;
 247                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 248                 s->sh.chroma_offset_l1[i][1] = 0;
 249             }
 250         }
 251     }
 252     return 0;
 253 }
 254
 255 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 256 {
 257     const HEVCSPS *sps = s->ps.sps;
 258     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 259     int prev_delta_msb = 0;
 260     unsigned int nb_sps = 0, nb_sh;
 261     int i;
 262
 263     rps->nb_refs = 0;
 264     if (!sps->long_term_ref_pics_present_flag)
 265         return 0;
 266
 267     if (sps->num_long_term_ref_pics_sps > 0)
 268         nb_sps = get_ue_golomb_long(gb);
 269     nb_sh = get_ue_golomb_long(gb);
 270
 271     if (nb_sps > sps->num_long_term_ref_pics_sps)
 272         return AVERROR_INVALIDDATA;
 273     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 274         return AVERROR_INVALIDDATA;
 275
 276     rps->nb_refs = nb_sh + nb_sps;
 277
 278     for (i = 0; i < rps->nb_refs; i++) {
 279         uint8_t delta_poc_msb_present;
 280
 281         if (i < nb_sps) {
 282             uint8_t lt_idx_sps = 0;
 283
 284             if (sps->num_long_term_ref_pics_sps > 1)
 285                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 286
 287             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 288             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 289         } else {
 290             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 291             rps->used[i] = get_bits1(gb);
 292         }
 293
 294         delta_poc_msb_present = get_bits1(gb);
 295         if (delta_poc_msb_present) {
 296             int64_t delta = get_ue_golomb_long(gb);
 297             int64_t poc;
 298
 299             if (i && i != nb_sps)
 300                 delta += prev_delta_msb;
 301
 302             poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 303             if (poc != (int32_t)poc)
 304                 return AVERROR_INVALIDDATA;
 305             rps->poc[i] = poc;
 306             prev_delta_msb = delta;
 307         }
 308     }
 309
 310     return 0;
 311 }
 312
 313 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 314                                  const HEVCSPS *sps)
 315 {
 316     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 317     const HEVCWindow *ow = &sps->output_window;
 318     unsigned int num = 0, den = 0;
 319
 320     avctx->pix_fmt             = sps->pix_fmt;
 321     avctx->coded_width         = sps->width;
 322     avctx->coded_height        = sps->height;
 323     avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
 324     avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
 325     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 326     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 327     avctx->level               = sps->ptl.general_ptl.level_idc;
 328
 329     ff_set_sar(avctx, sps->vui.sar);
 330
 331     if (sps->vui.video_signal_type_present_flag)
 332         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 333                                                             : AVCOL_RANGE_MPEG;
 334     else
 335         avctx->color_range = AVCOL_RANGE_MPEG;
 336
 337     if (sps->vui.colour_description_present_flag) {
 338         avctx->color_primaries = sps->vui.colour_primaries;
 339         avctx->color_trc       = sps->vui.transfer_characteristic;
 340         avctx->colorspace      = sps->vui.matrix_coeffs;
 341     } else {
 342         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 343         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 344         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 345     }
 346
 347     if (vps->vps_timing_info_present_flag) {
 348         num = vps->vps_num_units_in_tick;
 349         den = vps->vps_time_scale;
 350     } else if (sps->vui.vui_timing_info_present_flag) {
 351         num = sps->vui.vui_num_units_in_tick;
 352         den = sps->vui.vui_time_scale;
 353     }
 354
 355     if (num != 0 && den != 0)
 356         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 357                   num, den, 1 << 30);
 358 }
 359
 360 static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
 361 {
 362 #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
 363                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
 364                      CONFIG_HEVC_NVDEC_HWACCEL + \
 365                      CONFIG_HEVC_VAAPI_HWACCEL + \
 366                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
 367                      CONFIG_HEVC_VDPAU_HWACCEL)
 368     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 369
 370     switch (sps->pix_fmt) {
 371     case AV_PIX_FMT_YUV420P:
 372     case AV_PIX_FMT_YUVJ420P:
 373 #if CONFIG_HEVC_DXVA2_HWACCEL
 374         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 375 #endif
 376 #if CONFIG_HEVC_D3D11VA_HWACCEL
 377         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 378         *fmt++ = AV_PIX_FMT_D3D11;
 379 #endif
 380 #if CONFIG_HEVC_VAAPI_HWACCEL
 381         *fmt++ = AV_PIX_FMT_VAAPI;
 382 #endif
 383 #if CONFIG_HEVC_VDPAU_HWACCEL
 384         *fmt++ = AV_PIX_FMT_VDPAU;
 385 #endif
 386 #if CONFIG_HEVC_NVDEC_HWACCEL
 387         *fmt++ = AV_PIX_FMT_CUDA;
 388 #endif
 389 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 390         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 391 #endif
 392         break;
 393     case AV_PIX_FMT_YUV420P10:
 394 #if CONFIG_HEVC_DXVA2_HWACCEL
 395         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 396 #endif
 397 #if CONFIG_HEVC_D3D11VA_HWACCEL
 398         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 399         *fmt++ = AV_PIX_FMT_D3D11;
 400 #endif
 401 #if CONFIG_HEVC_VAAPI_HWACCEL
 402         *fmt++ = AV_PIX_FMT_VAAPI;
 403 #endif
 404 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
 405         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
 406 #endif
 407 #if CONFIG_HEVC_NVDEC_HWACCEL
 408         *fmt++ = AV_PIX_FMT_CUDA;
 409 #endif
 410         break;
 411     case AV_PIX_FMT_YUV420P12:
 412     case AV_PIX_FMT_YUV444P:
 413     case AV_PIX_FMT_YUV444P10:
 414     case AV_PIX_FMT_YUV444P12:
 415 #if CONFIG_HEVC_NVDEC_HWACCEL
 416         *fmt++ = AV_PIX_FMT_CUDA;
 417 #endif
 418         break;
 419     }
 420
 421     *fmt++ = sps->pix_fmt;
 422     *fmt = AV_PIX_FMT_NONE;
 423
 424     return ff_thread_get_format(s->avctx, pix_fmts);
 425 }
 426
 427 static int set_sps(HEVCContext *s, const HEVCSPS *sps,
 428                    enum AVPixelFormat pix_fmt)
 429 {
 430     int ret, i;
 431
 432     pic_arrays_free(s);
 433     s->ps.sps = NULL;
 434     s->ps.vps = NULL;
 435
 436     if (!sps)
 437         return 0;
 438
 439     ret = pic_arrays_init(s, sps);
 440     if (ret < 0)
 441         goto fail;
 442
 443     export_stream_params(s->avctx, &s->ps, sps);
 444
 445     s->avctx->pix_fmt = pix_fmt;
 446
 447     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 448     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 449     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 450
 451     for (i = 0; i < 3; i++) {
 452         av_freep(&s->sao_pixel_buffer_h[i]);
 453         av_freep(&s->sao_pixel_buffer_v[i]);
 454     }
 455
 456     if (sps->sao_enabled && !s->avctx->hwaccel) {
 457         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 458         int c_idx;
 459
 460         for(c_idx = 0; c_idx < c_count; c_idx++) {
 461             int w = sps->width >> sps->hshift[c_idx];
 462             int h = sps->height >> sps->vshift[c_idx];
 463             s->sao_pixel_buffer_h[c_idx] =
 464                 av_malloc((w * 2 * sps->ctb_height) <<
 465                           sps->pixel_shift);
 466             s->sao_pixel_buffer_v[c_idx] =
 467                 av_malloc((h * 2 * sps->ctb_width) <<
 468                           sps->pixel_shift);
 469         }
 470     }
 471
 472     s->ps.sps = sps;
 473     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 474
 475     return 0;
 476
 477 fail:
 478     pic_arrays_free(s);
 479     s->ps.sps = NULL;
 480     return ret;
 481 }
 482
 483 static int hls_slice_header(HEVCContext *s)
 484 {
 485     GetBitContext *gb = &s->HEVClc->gb;
 486     SliceHeader *sh   = &s->sh;
 487     int i, ret;
 488
 489     // Coded parameters
 490     sh->first_slice_in_pic_flag = get_bits1(gb);
 491     if (s->ref && sh->first_slice_in_pic_flag) {
 492         av_log(s->avctx, AV_LOG_ERROR, "Two slices reporting being the first in the same frame.\n");
 493         return 1; // This slice will be skiped later, do not corrupt state
 494     }
 495
 496     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 497         s->seq_decode = (s->seq_decode + 1) & 0xff;
 498         s->max_ra     = INT_MAX;
 499         if (IS_IDR(s))
 500             ff_hevc_clear_refs(s);
 501     }
 502     sh->no_output_of_prior_pics_flag = 0;
 503     if (IS_IRAP(s))
 504         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 505
 506     sh->pps_id = get_ue_golomb_long(gb);
 507     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 508         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 509         return AVERROR_INVALIDDATA;
 510     }
 511     if (!sh->first_slice_in_pic_flag &&
 512         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 513         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 514         return AVERROR_INVALIDDATA;
 515     }
 516     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 517     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 518         sh->no_output_of_prior_pics_flag = 1;
 519
 520     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 521         const HEVCSPS *sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 522         const HEVCSPS *last_sps = s->ps.sps;
 523         enum AVPixelFormat pix_fmt;
 524
 525         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 526             if (sps->width != last_sps->width || sps->height != last_sps->height ||
 527                 sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
 528                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 529                 sh->no_output_of_prior_pics_flag = 0;
 530         }
 531         ff_hevc_clear_refs(s);
 532
 533         ret = set_sps(s, sps, sps->pix_fmt);
 534         if (ret < 0)
 535             return ret;
 536
 537         pix_fmt = get_format(s, sps);
 538         if (pix_fmt < 0)
 539             return pix_fmt;
 540         s->avctx->pix_fmt = pix_fmt;
 541
 542         s->seq_decode = (s->seq_decode + 1) & 0xff;
 543         s->max_ra     = INT_MAX;
 544     }
 545
 546     sh->dependent_slice_segment_flag = 0;
 547     if (!sh->first_slice_in_pic_flag) {
 548         int slice_address_length;
 549
 550         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 551             sh->dependent_slice_segment_flag = get_bits1(gb);
 552
 553         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 554                                             s->ps.sps->ctb_height);
 555         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 556         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 557             av_log(s->avctx, AV_LOG_ERROR,
 558                    "Invalid slice segment address: %u.\n",
 559                    sh->slice_segment_addr);
 560             return AVERROR_INVALIDDATA;
 561         }
 562
 563         if (!sh->dependent_slice_segment_flag) {
 564             sh->slice_addr = sh->slice_segment_addr;
 565             s->slice_idx++;
 566         }
 567     } else {
 568         sh->slice_segment_addr = sh->slice_addr = 0;
 569         s->slice_idx           = 0;
 570         s->slice_initialized   = 0;
 571     }
 572
 573     if (!sh->dependent_slice_segment_flag) {
 574         s->slice_initialized = 0;
 575
 576         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 577             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 578
 579         sh->slice_type = get_ue_golomb_long(gb);
 580         if (!(sh->slice_type == HEVC_SLICE_I ||
 581               sh->slice_type == HEVC_SLICE_P ||
 582               sh->slice_type == HEVC_SLICE_B)) {
 583             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 584                    sh->slice_type);
 585             return AVERROR_INVALIDDATA;
 586         }
 587         if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
 588             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 589             return AVERROR_INVALIDDATA;
 590         }
 591
 592         // when flag is not present, picture is inferred to be output
 593         sh->pic_output_flag = 1;
 594         if (s->ps.pps->output_flag_present_flag)
 595             sh->pic_output_flag = get_bits1(gb);
 596
 597         if (s->ps.sps->separate_colour_plane_flag)
 598             sh->colour_plane_id = get_bits(gb, 2);
 599
 600         if (!IS_IDR(s)) {
 601             int poc, pos;
 602
 603             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 604             poc = ff_hevc_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
 605             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 606                 av_log(s->avctx, AV_LOG_WARNING,
 607                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 608                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 609                     return AVERROR_INVALIDDATA;
 610                 poc = s->poc;
 611             }
 612             s->poc = poc;
 613
 614             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 615             pos = get_bits_left(gb);
 616             if (!sh->short_term_ref_pic_set_sps_flag) {
 617                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 618                 if (ret < 0)
 619                     return ret;
 620
 621                 sh->short_term_rps = &sh->slice_rps;
 622             } else {
 623                 int numbits, rps_idx;
 624
 625                 if (!s->ps.sps->nb_st_rps) {
 626                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 627                     return AVERROR_INVALIDDATA;
 628                 }
 629
 630                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 631                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 632                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 633             }
 634             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 635
 636             pos = get_bits_left(gb);
 637             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 638             if (ret < 0) {
 639                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 640                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 641                     return AVERROR_INVALIDDATA;
 642             }
 643             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 644
 645             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 646                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 647             else
 648                 sh->slice_temporal_mvp_enabled_flag = 0;
 649         } else {
 650             s->sh.short_term_rps = NULL;
 651             s->poc               = 0;
 652         }
 653
 654         /* 8.3.1 */
 655         if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 656             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 657             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 658             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 659             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 660             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 661             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 662             s->nal_unit_type != HEVC_NAL_RASL_R)
 663             s->pocTid0 = s->poc;
 664
 665         if (s->ps.sps->sao_enabled) {
 666             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 667             if (s->ps.sps->chroma_format_idc) {
 668                 sh->slice_sample_adaptive_offset_flag[1] =
 669                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 670             }
 671         } else {
 672             sh->slice_sample_adaptive_offset_flag[0] = 0;
 673             sh->slice_sample_adaptive_offset_flag[1] = 0;
 674             sh->slice_sample_adaptive_offset_flag[2] = 0;
 675         }
 676
 677         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 678         if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
 679             int nb_refs;
 680
 681             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 682             if (sh->slice_type == HEVC_SLICE_B)
 683                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 684
 685             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 686                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 687                 if (sh->slice_type == HEVC_SLICE_B)
 688                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 689             }
 690             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 691                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 692                        sh->nb_refs[L0], sh->nb_refs[L1]);
 693                 return AVERROR_INVALIDDATA;
 694             }
 695
 696             sh->rpl_modification_flag[0] = 0;
 697             sh->rpl_modification_flag[1] = 0;
 698             nb_refs = ff_hevc_frame_nb_refs(s);
 699             if (!nb_refs) {
 700                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 701                 return AVERROR_INVALIDDATA;
 702             }
 703
 704             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 705                 sh->rpl_modification_flag[0] = get_bits1(gb);
 706                 if (sh->rpl_modification_flag[0]) {
 707                     for (i = 0; i < sh->nb_refs[L0]; i++)
 708                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 709                 }
 710
 711                 if (sh->slice_type == HEVC_SLICE_B) {
 712                     sh->rpl_modification_flag[1] = get_bits1(gb);
 713                     if (sh->rpl_modification_flag[1] == 1)
 714                         for (i = 0; i < sh->nb_refs[L1]; i++)
 715                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 716                 }
 717             }
 718
 719             if (sh->slice_type == HEVC_SLICE_B)
 720                 sh->mvd_l1_zero_flag = get_bits1(gb);
 721
 722             if (s->ps.pps->cabac_init_present_flag)
 723                 sh->cabac_init_flag = get_bits1(gb);
 724             else
 725                 sh->cabac_init_flag = 0;
 726
 727             sh->collocated_ref_idx = 0;
 728             if (sh->slice_temporal_mvp_enabled_flag) {
 729                 sh->collocated_list = L0;
 730                 if (sh->slice_type == HEVC_SLICE_B)
 731                     sh->collocated_list = !get_bits1(gb);
 732
 733                 if (sh->nb_refs[sh->collocated_list] > 1) {
 734                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 735                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 736                         av_log(s->avctx, AV_LOG_ERROR,
 737                                "Invalid collocated_ref_idx: %d.\n",
 738                                sh->collocated_ref_idx);
 739                         return AVERROR_INVALIDDATA;
 740                     }
 741                 }
 742             }
 743
 744             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
 745                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B)) {
 746                 int ret = pred_weight_table(s, gb);
 747                 if (ret < 0)
 748                     return ret;
 749             }
 750
 751             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 752             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 753                 av_log(s->avctx, AV_LOG_ERROR,
 754                        "Invalid number of merging MVP candidates: %d.\n",
 755                        sh->max_num_merge_cand);
 756                 return AVERROR_INVALIDDATA;
 757             }
 758         }
 759
 760         sh->slice_qp_delta = get_se_golomb(gb);
 761
 762         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 763             sh->slice_cb_qp_offset = get_se_golomb(gb);
 764             sh->slice_cr_qp_offset = get_se_golomb(gb);
 765         } else {
 766             sh->slice_cb_qp_offset = 0;
 767             sh->slice_cr_qp_offset = 0;
 768         }
 769
 770         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 771             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 772         else
 773             sh->cu_chroma_qp_offset_enabled_flag = 0;
 774
 775         if (s->ps.pps->deblocking_filter_control_present_flag) {
 776             int deblocking_filter_override_flag = 0;
 777
 778             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 779                 deblocking_filter_override_flag = get_bits1(gb);
 780
 781             if (deblocking_filter_override_flag) {
 782                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 783                 if (!sh->disable_deblocking_filter_flag) {
 784                     int beta_offset_div2 = get_se_golomb(gb);
 785                     int tc_offset_div2   = get_se_golomb(gb) ;
 786                     if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
 787                         tc_offset_div2   < -6 || tc_offset_div2   > 6) {
 788                         av_log(s->avctx, AV_LOG_ERROR,
 789                             "Invalid deblock filter offsets: %d, %d\n",
 790                             beta_offset_div2, tc_offset_div2);
 791                         return AVERROR_INVALIDDATA;
 792                     }
 793                     sh->beta_offset = beta_offset_div2 * 2;
 794                     sh->tc_offset   =   tc_offset_div2 * 2;
 795                 }
 796             } else {
 797                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 798                 sh->beta_offset                    = s->ps.pps->beta_offset;
 799                 sh->tc_offset                      = s->ps.pps->tc_offset;
 800             }
 801         } else {
 802             sh->disable_deblocking_filter_flag = 0;
 803             sh->beta_offset                    = 0;
 804             sh->tc_offset                      = 0;
 805         }
 806
 807         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 808             (sh->slice_sample_adaptive_offset_flag[0] ||
 809              sh->slice_sample_adaptive_offset_flag[1] ||
 810              !sh->disable_deblocking_filter_flag)) {
 811             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 812         } else {
 813             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 814         }
 815     } else if (!s->slice_initialized) {
 816         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 817         return AVERROR_INVALIDDATA;
 818     }
 819
 820     sh->num_entry_point_offsets = 0;
 821     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 822         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 823         // It would be possible to bound this tighter but this here is simpler
 824         if (num_entry_point_offsets > get_bits_left(gb)) {
 825             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 826             return AVERROR_INVALIDDATA;
 827         }
 828
 829         sh->num_entry_point_offsets = num_entry_point_offsets;
 830         if (sh->num_entry_point_offsets > 0) {
 831             int offset_len = get_ue_golomb_long(gb) + 1;
 832
 833             if (offset_len < 1 || offset_len > 32) {
 834                 sh->num_entry_point_offsets = 0;
 835                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 836                 return AVERROR_INVALIDDATA;
 837             }
 838
 839             av_freep(&sh->entry_point_offset);
 840             av_freep(&sh->offset);
 841             av_freep(&sh->size);
 842             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 843             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 844             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 845             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 846                 sh->num_entry_point_offsets = 0;
 847                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 848                 return AVERROR(ENOMEM);
 849             }
 850             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 851                 unsigned val = get_bits_long(gb, offset_len);
 852                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 853             }
 854             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 855                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 856                 s->threads_number = 1;
 857             } else
 858                 s->enable_parallel_tiles = 0;
 859         } else
 860             s->enable_parallel_tiles = 0;
 861     }
 862
 863     if (s->ps.pps->slice_header_extension_present_flag) {
 864         unsigned int length = get_ue_golomb_long(gb);
 865         if (length*8LL > get_bits_left(gb)) {
 866             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 867             return AVERROR_INVALIDDATA;
 868         }
 869         for (i = 0; i < length; i++)
 870             skip_bits(gb, 8);  // slice_header_extension_data_byte
 871     }
 872
 873     // Inferred parameters
 874     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 875     if (sh->slice_qp > 51 ||
 876         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 877         av_log(s->avctx, AV_LOG_ERROR,
 878                "The slice_qp %d is outside the valid range "
 879                "[%d, 51].\n",
 880                sh->slice_qp,
 881                -s->ps.sps->qp_bd_offset);
 882         return AVERROR_INVALIDDATA;
 883     }
 884
 885     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 886
 887     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 888         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 889         return AVERROR_INVALIDDATA;
 890     }
 891
 892     if (get_bits_left(gb) < 0) {
 893         av_log(s->avctx, AV_LOG_ERROR,
 894                "Overread slice header by %d bits\n", -get_bits_left(gb));
 895         return AVERROR_INVALIDDATA;
 896     }
 897
 898     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 899
 900     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 901         s->HEVClc->qp_y = s->sh.slice_qp;
 902
 903     s->slice_initialized = 1;
 904     s->HEVClc->tu.cu_qp_offset_cb = 0;
 905     s->HEVClc->tu.cu_qp_offset_cr = 0;
 906
 907     return 0;
 908 }
 909
 910 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 911
 912 #define SET_SAO(elem, value)                            \
 913 do {                                                    \
 914     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 915         sao->elem = value;                              \
 916     else if (sao_merge_left_flag)                       \
 917         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 918     else if (sao_merge_up_flag)                         \
 919         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 920     else                                                \
 921         sao->elem = 0;                                  \
 922 } while (0)
 923
 924 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 925 {
 926     HEVCLocalContext *lc    = s->HEVClc;
 927     int sao_merge_left_flag = 0;
 928     int sao_merge_up_flag   = 0;
 929     SAOParams *sao          = &CTB(s->sao, rx, ry);
 930     int c_idx, i;
 931
 932     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 933         s->sh.slice_sample_adaptive_offset_flag[1]) {
 934         if (rx > 0) {
 935             if (lc->ctb_left_flag)
 936                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 937         }
 938         if (ry > 0 && !sao_merge_left_flag) {
 939             if (lc->ctb_up_flag)
 940                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 941         }
 942     }
 943
 944     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 945         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 946                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 947
 948         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 949             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 950             continue;
 951         }
 952
 953         if (c_idx == 2) {
 954             sao->type_idx[2] = sao->type_idx[1];
 955             sao->eo_class[2] = sao->eo_class[1];
 956         } else {
 957             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 958         }
 959
 960         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 961             continue;
 962
 963         for (i = 0; i < 4; i++)
 964             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 965
 966         if (sao->type_idx[c_idx] == SAO_BAND) {
 967             for (i = 0; i < 4; i++) {
 968                 if (sao->offset_abs[c_idx][i]) {
 969                     SET_SAO(offset_sign[c_idx][i],
 970                             ff_hevc_sao_offset_sign_decode(s));
 971                 } else {
 972                     sao->offset_sign[c_idx][i] = 0;
 973                 }
 974             }
 975             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 976         } else if (c_idx != 2) {
 977             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 978         }
 979
 980         // Inferred parameters
 981         sao->offset_val[c_idx][0] = 0;
 982         for (i = 0; i < 4; i++) {
 983             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 984             if (sao->type_idx[c_idx] == SAO_EDGE) {
 985                 if (i > 1)
 986                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 987             } else if (sao->offset_sign[c_idx][i]) {
 988                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 989             }
 990             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 991         }
 992     }
 993 }
 994
 995 #undef SET_SAO
 996 #undef CTB
 997
 998 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 999     HEVCLocalContext *lc    = s->HEVClc;
1000     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
1001
1002     if (log2_res_scale_abs_plus1 !=  0) {
1003         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
1004         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
1005                                (1 - 2 * res_scale_sign_flag);
1006     } else {
1007         lc->tu.res_scale_val = 0;
1008     }
1009
1010
1011     return 0;
1012 }
1013
1014 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
1015                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1016                               int log2_cb_size, int log2_trafo_size,
1017                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
1018 {
1019     HEVCLocalContext *lc = s->HEVClc;
1020     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
1021     int i;
1022
1023     if (lc->cu.pred_mode == MODE_INTRA) {
1024         int trafo_size = 1 << log2_trafo_size;
1025         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
1026
1027         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
1028     }
1029
1030     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
1031         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1032         int scan_idx   = SCAN_DIAG;
1033         int scan_idx_c = SCAN_DIAG;
1034         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
1035                          (s->ps.sps->chroma_format_idc == 2 &&
1036                          (cbf_cb[1] || cbf_cr[1]));
1037
1038         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
1039             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
1040             if (lc->tu.cu_qp_delta != 0)
1041                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
1042                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
1043             lc->tu.is_cu_qp_delta_coded = 1;
1044
1045             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
1046                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
1047                 av_log(s->avctx, AV_LOG_ERROR,
1048                        "The cu_qp_delta %d is outside the valid range "
1049                        "[%d, %d].\n",
1050                        lc->tu.cu_qp_delta,
1051                        -(26 + s->ps.sps->qp_bd_offset / 2),
1052                         (25 + s->ps.sps->qp_bd_offset / 2));
1053                 return AVERROR_INVALIDDATA;
1054             }
1055
1056             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
1057         }
1058
1059         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
1060             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
1061             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
1062             if (cu_chroma_qp_offset_flag) {
1063                 int cu_chroma_qp_offset_idx  = 0;
1064                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
1065                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
1066                     av_log(s->avctx, AV_LOG_ERROR,
1067                         "cu_chroma_qp_offset_idx not yet tested.\n");
1068                 }
1069                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
1070                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
1071             } else {
1072                 lc->tu.cu_qp_offset_cb = 0;
1073                 lc->tu.cu_qp_offset_cr = 0;
1074             }
1075             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1076         }
1077
1078         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1079             if (lc->tu.intra_pred_mode >= 6 &&
1080                 lc->tu.intra_pred_mode <= 14) {
1081                 scan_idx = SCAN_VERT;
1082             } else if (lc->tu.intra_pred_mode >= 22 &&
1083                        lc->tu.intra_pred_mode <= 30) {
1084                 scan_idx = SCAN_HORIZ;
1085             }
1086
1087             if (lc->tu.intra_pred_mode_c >=  6 &&
1088                 lc->tu.intra_pred_mode_c <= 14) {
1089                 scan_idx_c = SCAN_VERT;
1090             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1091                        lc->tu.intra_pred_mode_c <= 30) {
1092                 scan_idx_c = SCAN_HORIZ;
1093             }
1094         }
1095
1096         lc->tu.cross_pf = 0;
1097
1098         if (cbf_luma)
1099             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1100         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1101             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1102             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1103             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1104                                 (lc->cu.pred_mode == MODE_INTER ||
1105                                  (lc->tu.chroma_mode_c ==  4)));
1106
1107             if (lc->tu.cross_pf) {
1108                 hls_cross_component_pred(s, 0);
1109             }
1110             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1111                 if (lc->cu.pred_mode == MODE_INTRA) {
1112                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1113                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1114                 }
1115                 if (cbf_cb[i])
1116                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1117                                                 log2_trafo_size_c, scan_idx_c, 1);
1118                 else
1119                     if (lc->tu.cross_pf) {
1120                         ptrdiff_t stride = s->frame->linesize[1];
1121                         int hshift = s->ps.sps->hshift[1];
1122                         int vshift = s->ps.sps->vshift[1];
1123                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1124                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1125                         int size = 1 << log2_trafo_size_c;
1126
1127                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1128                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1129                         for (i = 0; i < (size * size); i++) {
1130                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1131                         }
1132                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1133                     }
1134             }
1135
1136             if (lc->tu.cross_pf) {
1137                 hls_cross_component_pred(s, 1);
1138             }
1139             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1140                 if (lc->cu.pred_mode == MODE_INTRA) {
1141                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1142                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1143                 }
1144                 if (cbf_cr[i])
1145                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1146                                                 log2_trafo_size_c, scan_idx_c, 2);
1147                 else
1148                     if (lc->tu.cross_pf) {
1149                         ptrdiff_t stride = s->frame->linesize[2];
1150                         int hshift = s->ps.sps->hshift[2];
1151                         int vshift = s->ps.sps->vshift[2];
1152                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1153                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1154                         int size = 1 << log2_trafo_size_c;
1155
1156                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1157                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1158                         for (i = 0; i < (size * size); i++) {
1159                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1160                         }
1161                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1162                     }
1163             }
1164         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1165             int trafo_size_h = 1 << (log2_trafo_size + 1);
1166             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1167             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1168                 if (lc->cu.pred_mode == MODE_INTRA) {
1169                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1170                                                     trafo_size_h, trafo_size_v);
1171                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1172                 }
1173                 if (cbf_cb[i])
1174                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1175                                                 log2_trafo_size, scan_idx_c, 1);
1176             }
1177             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1178                 if (lc->cu.pred_mode == MODE_INTRA) {
1179                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1180                                                 trafo_size_h, trafo_size_v);
1181                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1182                 }
1183                 if (cbf_cr[i])
1184                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1185                                                 log2_trafo_size, scan_idx_c, 2);
1186             }
1187         }
1188     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1189         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1190             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1191             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1192             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1193             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1194             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1195             if (s->ps.sps->chroma_format_idc == 2) {
1196                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1197                                                 trafo_size_h, trafo_size_v);
1198                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1199                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1200             }
1201         } else if (blk_idx == 3) {
1202             int trafo_size_h = 1 << (log2_trafo_size + 1);
1203             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1204             ff_hevc_set_neighbour_available(s, xBase, yBase,
1205                                             trafo_size_h, trafo_size_v);
1206             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1207             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1208             if (s->ps.sps->chroma_format_idc == 2) {
1209                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1210                                                 trafo_size_h, trafo_size_v);
1211                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1212                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1213             }
1214         }
1215     }
1216
1217     return 0;
1218 }
1219
1220 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1221 {
1222     int cb_size          = 1 << log2_cb_size;
1223     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1224
1225     int min_pu_width     = s->ps.sps->min_pu_width;
1226     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1227     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1228     int i, j;
1229
1230     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1231         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1232             s->is_pcm[i + j * min_pu_width] = 2;
1233 }
1234
1235 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1236                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1237                               int log2_cb_size, int log2_trafo_size,
1238                               int trafo_depth, int blk_idx,
1239                               const int *base_cbf_cb, const int *base_cbf_cr)
1240 {
1241     HEVCLocalContext *lc = s->HEVClc;
1242     uint8_t split_transform_flag;
1243     int cbf_cb[2];
1244     int cbf_cr[2];
1245     int ret;
1246
1247     cbf_cb[0] = base_cbf_cb[0];
1248     cbf_cb[1] = base_cbf_cb[1];
1249     cbf_cr[0] = base_cbf_cr[0];
1250     cbf_cr[1] = base_cbf_cr[1];
1251
1252     if (lc->cu.intra_split_flag) {
1253         if (trafo_depth == 1) {
1254             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1255             if (s->ps.sps->chroma_format_idc == 3) {
1256                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1257                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1258             } else {
1259                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1260                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1261             }
1262         }
1263     } else {
1264         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1265         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1266         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1267     }
1268
1269     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1270         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1271         trafo_depth     < lc->cu.max_trafo_depth       &&
1272         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1273         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1274     } else {
1275         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1276                           lc->cu.pred_mode == MODE_INTER &&
1277                           lc->cu.part_mode != PART_2Nx2N &&
1278                           trafo_depth == 0;
1279
1280         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1281                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1282                                inter_split;
1283     }
1284
1285     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1286         if (trafo_depth == 0 || cbf_cb[0]) {
1287             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1288             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1289                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1290             }
1291         }
1292
1293         if (trafo_depth == 0 || cbf_cr[0]) {
1294             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1295             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1296                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1297             }
1298         }
1299     }
1300
1301     if (split_transform_flag) {
1302         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1303         const int x1 = x0 + trafo_size_split;
1304         const int y1 = y0 + trafo_size_split;
1305
1306 #define SUBDIVIDE(x, y, idx)                                                    \
1307 do {                                                                            \
1308     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1309                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1310                              cbf_cb, cbf_cr);                                   \
1311     if (ret < 0)                                                                \
1312         return ret;                                                             \
1313 } while (0)
1314
1315         SUBDIVIDE(x0, y0, 0);
1316         SUBDIVIDE(x1, y0, 1);
1317         SUBDIVIDE(x0, y1, 2);
1318         SUBDIVIDE(x1, y1, 3);
1319
1320 #undef SUBDIVIDE
1321     } else {
1322         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1323         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1324         int min_tu_width     = s->ps.sps->min_tb_width;
1325         int cbf_luma         = 1;
1326
1327         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1328             cbf_cb[0] || cbf_cr[0] ||
1329             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1330             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1331         }
1332
1333         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1334                                  log2_cb_size, log2_trafo_size,
1335                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1336         if (ret < 0)
1337             return ret;
1338         // TODO: store cbf_luma somewhere else
1339         if (cbf_luma) {
1340             int i, j;
1341             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1342                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1343                     int x_tu = (x0 + j) >> log2_min_tu_size;
1344                     int y_tu = (y0 + i) >> log2_min_tu_size;
1345                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1346                 }
1347         }
1348         if (!s->sh.disable_deblocking_filter_flag) {
1349             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1350             if (s->ps.pps->transquant_bypass_enable_flag &&
1351                 lc->cu.cu_transquant_bypass_flag)
1352                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1353         }
1354     }
1355     return 0;
1356 }
1357
1358 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1359 {
1360     HEVCLocalContext *lc = s->HEVClc;
1361     GetBitContext gb;
1362     int cb_size   = 1 << log2_cb_size;
1363     ptrdiff_t stride0 = s->frame->linesize[0];
1364     ptrdiff_t stride1 = s->frame->linesize[1];
1365     ptrdiff_t stride2 = s->frame->linesize[2];
1366     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1367     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1368     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1369
1370     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1371                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1372                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1373                           s->ps.sps->pcm.bit_depth_chroma;
1374     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1375     int ret;
1376
1377     if (!s->sh.disable_deblocking_filter_flag)
1378         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1379
1380     ret = init_get_bits(&gb, pcm, length);
1381     if (ret < 0)
1382         return ret;
1383
1384     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1385     if (s->ps.sps->chroma_format_idc) {
1386         s->hevcdsp.put_pcm(dst1, stride1,
1387                            cb_size >> s->ps.sps->hshift[1],
1388                            cb_size >> s->ps.sps->vshift[1],
1389                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1390         s->hevcdsp.put_pcm(dst2, stride2,
1391                            cb_size >> s->ps.sps->hshift[2],
1392                            cb_size >> s->ps.sps->vshift[2],
1393                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1394     }
1395
1396     return 0;
1397 }
1398
1399 /**
1400  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1401  *
1402  * @param s HEVC decoding context
1403  * @param dst target buffer for block data at block position
1404  * @param dststride stride of the dst buffer
1405  * @param ref reference picture buffer at origin (0, 0)
1406  * @param mv motion vector (relative to block position) to get pixel data from
1407  * @param x_off horizontal position of block from origin (0, 0)
1408  * @param y_off vertical position of block from origin (0, 0)
1409  * @param block_w width of block
1410  * @param block_h height of block
1411  * @param luma_weight weighting factor applied to the luma prediction
1412  * @param luma_offset additive offset applied to the luma prediction value
1413  */
1414
1415 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1416                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1417                         int block_w, int block_h, int luma_weight, int luma_offset)
1418 {
1419     HEVCLocalContext *lc = s->HEVClc;
1420     uint8_t *src         = ref->data[0];
1421     ptrdiff_t srcstride  = ref->linesize[0];
1422     int pic_width        = s->ps.sps->width;
1423     int pic_height       = s->ps.sps->height;
1424     int mx               = mv->x & 3;
1425     int my               = mv->y & 3;
1426     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1427                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1428     int idx              = ff_hevc_pel_weight[block_w];
1429
1430     x_off += mv->x >> 2;
1431     y_off += mv->y >> 2;
1432     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1433
1434     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1435         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1436         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1437         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1438         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1439         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1440
1441         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1442                                  edge_emu_stride, srcstride,
1443                                  block_w + QPEL_EXTRA,
1444                                  block_h + QPEL_EXTRA,
1445                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1446                                  pic_width, pic_height);
1447         src = lc->edge_emu_buffer + buf_offset;
1448         srcstride = edge_emu_stride;
1449     }
1450
1451     if (!weight_flag)
1452         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1453                                                       block_h, mx, my, block_w);
1454     else
1455         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1456                                                         block_h, s->sh.luma_log2_weight_denom,
1457                                                         luma_weight, luma_offset, mx, my, block_w);
1458 }
1459
1460 /**
1461  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1462  *
1463  * @param s HEVC decoding context
1464  * @param dst target buffer for block data at block position
1465  * @param dststride stride of the dst buffer
1466  * @param ref0 reference picture0 buffer at origin (0, 0)
1467  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1468  * @param x_off horizontal position of block from origin (0, 0)
1469  * @param y_off vertical position of block from origin (0, 0)
1470  * @param block_w width of block
1471  * @param block_h height of block
1472  * @param ref1 reference picture1 buffer at origin (0, 0)
1473  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1474  * @param current_mv current motion vector structure
1475  */
1476  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1477                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1478                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1479 {
1480     HEVCLocalContext *lc = s->HEVClc;
1481     ptrdiff_t src0stride  = ref0->linesize[0];
1482     ptrdiff_t src1stride  = ref1->linesize[0];
1483     int pic_width        = s->ps.sps->width;
1484     int pic_height       = s->ps.sps->height;
1485     int mx0              = mv0->x & 3;
1486     int my0              = mv0->y & 3;
1487     int mx1              = mv1->x & 3;
1488     int my1              = mv1->y & 3;
1489     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1490                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1491     int x_off0           = x_off + (mv0->x >> 2);
1492     int y_off0           = y_off + (mv0->y >> 2);
1493     int x_off1           = x_off + (mv1->x >> 2);
1494     int y_off1           = y_off + (mv1->y >> 2);
1495     int idx              = ff_hevc_pel_weight[block_w];
1496
1497     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1498     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1499
1500     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1501         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1502         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1503         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1504         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1505         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1506
1507         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1508                                  edge_emu_stride, src0stride,
1509                                  block_w + QPEL_EXTRA,
1510                                  block_h + QPEL_EXTRA,
1511                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1512                                  pic_width, pic_height);
1513         src0 = lc->edge_emu_buffer + buf_offset;
1514         src0stride = edge_emu_stride;
1515     }
1516
1517     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1518         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1519         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1520         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1521         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1522         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1523
1524         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1525                                  edge_emu_stride, src1stride,
1526                                  block_w + QPEL_EXTRA,
1527                                  block_h + QPEL_EXTRA,
1528                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1529                                  pic_width, pic_height);
1530         src1 = lc->edge_emu_buffer2 + buf_offset;
1531         src1stride = edge_emu_stride;
1532     }
1533
1534     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1535                                                 block_h, mx0, my0, block_w);
1536     if (!weight_flag)
1537         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1538                                                        block_h, mx1, my1, block_w);
1539     else
1540         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1541                                                          block_h, s->sh.luma_log2_weight_denom,
1542                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1543                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1544                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1545                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1546                                                          mx1, my1, block_w);
1547
1548 }
1549
1550 /**
1551  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1552  *
1553  * @param s HEVC decoding context
1554  * @param dst1 target buffer for block data at block position (U plane)
1555  * @param dst2 target buffer for block data at block position (V plane)
1556  * @param dststride stride of the dst1 and dst2 buffers
1557  * @param ref reference picture buffer at origin (0, 0)
1558  * @param mv motion vector (relative to block position) to get pixel data from
1559  * @param x_off horizontal position of block from origin (0, 0)
1560  * @param y_off vertical position of block from origin (0, 0)
1561  * @param block_w width of block
1562  * @param block_h height of block
1563  * @param chroma_weight weighting factor applied to the chroma prediction
1564  * @param chroma_offset additive offset applied to the chroma prediction value
1565  */
1566
1567 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1568                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1569                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1570 {
1571     HEVCLocalContext *lc = s->HEVClc;
1572     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1573     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1574     const Mv *mv         = &current_mv->mv[reflist];
1575     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1576                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1577     int idx              = ff_hevc_pel_weight[block_w];
1578     int hshift           = s->ps.sps->hshift[1];
1579     int vshift           = s->ps.sps->vshift[1];
1580     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1581     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1582     intptr_t _mx         = mx << (1 - hshift);
1583     intptr_t _my         = my << (1 - vshift);
1584
1585     x_off += mv->x >> (2 + hshift);
1586     y_off += mv->y >> (2 + vshift);
1587     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1588
1589     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1590         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1591         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1592         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1593         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1594         int buf_offset0 = EPEL_EXTRA_BEFORE *
1595                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1596         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1597                                  edge_emu_stride, srcstride,
1598                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1599                                  x_off - EPEL_EXTRA_BEFORE,
1600                                  y_off - EPEL_EXTRA_BEFORE,
1601                                  pic_width, pic_height);
1602
1603         src0 = lc->edge_emu_buffer + buf_offset0;
1604         srcstride = edge_emu_stride;
1605     }
1606     if (!weight_flag)
1607         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1608                                                   block_h, _mx, _my, block_w);
1609     else
1610         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1611                                                         block_h, s->sh.chroma_log2_weight_denom,
1612                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1613 }
1614
1615 /**
1616  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1617  *
1618  * @param s HEVC decoding context
1619  * @param dst target buffer for block data at block position
1620  * @param dststride stride of the dst buffer
1621  * @param ref0 reference picture0 buffer at origin (0, 0)
1622  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1623  * @param x_off horizontal position of block from origin (0, 0)
1624  * @param y_off vertical position of block from origin (0, 0)
1625  * @param block_w width of block
1626  * @param block_h height of block
1627  * @param ref1 reference picture1 buffer at origin (0, 0)
1628  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1629  * @param current_mv current motion vector structure
1630  * @param cidx chroma component(cb, cr)
1631  */
1632 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1633                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1634 {
1635     HEVCLocalContext *lc = s->HEVClc;
1636     uint8_t *src1        = ref0->data[cidx+1];
1637     uint8_t *src2        = ref1->data[cidx+1];
1638     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1639     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1640     int weight_flag      = (s->sh.slice_type == HEVC_SLICE_P && s->ps.pps->weighted_pred_flag) ||
1641                            (s->sh.slice_type == HEVC_SLICE_B && s->ps.pps->weighted_bipred_flag);
1642     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1643     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1644     Mv *mv0              = &current_mv->mv[0];
1645     Mv *mv1              = &current_mv->mv[1];
1646     int hshift = s->ps.sps->hshift[1];
1647     int vshift = s->ps.sps->vshift[1];
1648
1649     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1650     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1651     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1652     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1653     intptr_t _mx0 = mx0 << (1 - hshift);
1654     intptr_t _my0 = my0 << (1 - vshift);
1655     intptr_t _mx1 = mx1 << (1 - hshift);
1656     intptr_t _my1 = my1 << (1 - vshift);
1657
1658     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1659     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1660     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1661     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1662     int idx = ff_hevc_pel_weight[block_w];
1663     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1664     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1665
1666     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1667         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1668         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1669         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1670         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1671         int buf_offset1 = EPEL_EXTRA_BEFORE *
1672                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1673
1674         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1675                                  edge_emu_stride, src1stride,
1676                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1677                                  x_off0 - EPEL_EXTRA_BEFORE,
1678                                  y_off0 - EPEL_EXTRA_BEFORE,
1679                                  pic_width, pic_height);
1680
1681         src1 = lc->edge_emu_buffer + buf_offset1;
1682         src1stride = edge_emu_stride;
1683     }
1684
1685     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1686         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1687         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1688         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1689         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1690         int buf_offset1 = EPEL_EXTRA_BEFORE *
1691                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1692
1693         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1694                                  edge_emu_stride, src2stride,
1695                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1696                                  x_off1 - EPEL_EXTRA_BEFORE,
1697                                  y_off1 - EPEL_EXTRA_BEFORE,
1698                                  pic_width, pic_height);
1699
1700         src2 = lc->edge_emu_buffer2 + buf_offset1;
1701         src2stride = edge_emu_stride;
1702     }
1703
1704     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1705                                                 block_h, _mx0, _my0, block_w);
1706     if (!weight_flag)
1707         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1708                                                        src2, src2stride, lc->tmp,
1709                                                        block_h, _mx1, _my1, block_w);
1710     else
1711         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1712                                                          src2, src2stride, lc->tmp,
1713                                                          block_h,
1714                                                          s->sh.chroma_log2_weight_denom,
1715                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1716                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1717                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1718                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1719                                                          _mx1, _my1, block_w);
1720 }
1721
1722 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1723                                 const Mv *mv, int y0, int height)
1724 {
1725     if (s->threads_type == FF_THREAD_FRAME ) {
1726         int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1727
1728         ff_thread_await_progress(&ref->tf, y, 0);
1729     }
1730 }
1731
1732 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1733                                   int nPbH, int log2_cb_size, int part_idx,
1734                                   int merge_idx, MvField *mv)
1735 {
1736     HEVCLocalContext *lc = s->HEVClc;
1737     enum InterPredIdc inter_pred_idc = PRED_L0;
1738     int mvp_flag;
1739
1740     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1741     mv->pred_flag = 0;
1742     if (s->sh.slice_type == HEVC_SLICE_B)
1743         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1744
1745     if (inter_pred_idc != PRED_L1) {
1746         if (s->sh.nb_refs[L0])
1747             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1748
1749         mv->pred_flag = PF_L0;
1750         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1751         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1752         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1753                                  part_idx, merge_idx, mv, mvp_flag, 0);
1754         mv->mv[0].x += lc->pu.mvd.x;
1755         mv->mv[0].y += lc->pu.mvd.y;
1756     }
1757
1758     if (inter_pred_idc != PRED_L0) {
1759         if (s->sh.nb_refs[L1])
1760             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1761
1762         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1763             AV_ZERO32(&lc->pu.mvd);
1764         } else {
1765             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1766         }
1767
1768         mv->pred_flag += PF_L1;
1769         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1770         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1771                                  part_idx, merge_idx, mv, mvp_flag, 1);
1772         mv->mv[1].x += lc->pu.mvd.x;
1773         mv->mv[1].y += lc->pu.mvd.y;
1774     }
1775 }
1776
1777 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1778                                 int nPbW, int nPbH,
1779                                 int log2_cb_size, int partIdx, int idx)
1780 {
1781 #define POS(c_idx, x, y)                                                              \
1782     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1783                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1784     HEVCLocalContext *lc = s->HEVClc;
1785     int merge_idx = 0;
1786     struct MvField current_mv = {{{ 0 }}};
1787
1788     int min_pu_width = s->ps.sps->min_pu_width;
1789
1790     MvField *tab_mvf = s->ref->tab_mvf;
1791     RefPicList  *refPicList = s->ref->refPicList;
1792     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1793     uint8_t *dst0 = POS(0, x0, y0);
1794     uint8_t *dst1 = POS(1, x0, y0);
1795     uint8_t *dst2 = POS(2, x0, y0);
1796     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1797     int min_cb_width     = s->ps.sps->min_cb_width;
1798     int x_cb             = x0 >> log2_min_cb_size;
1799     int y_cb             = y0 >> log2_min_cb_size;
1800     int x_pu, y_pu;
1801     int i, j;
1802
1803     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1804
1805     if (!skip_flag)
1806         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1807
1808     if (skip_flag || lc->pu.merge_flag) {
1809         if (s->sh.max_num_merge_cand > 1)
1810             merge_idx = ff_hevc_merge_idx_decode(s);
1811         else
1812             merge_idx = 0;
1813
1814         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1815                                    partIdx, merge_idx, &current_mv);
1816     } else {
1817         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1818                               partIdx, merge_idx, &current_mv);
1819     }
1820
1821     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1822     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1823
1824     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1825         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1826             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1827
1828     if (current_mv.pred_flag & PF_L0) {
1829         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1830         if (!ref0)
1831             return;
1832         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1833     }
1834     if (current_mv.pred_flag & PF_L1) {
1835         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1836         if (!ref1)
1837             return;
1838         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1839     }
1840
1841     if (current_mv.pred_flag == PF_L0) {
1842         int x0_c = x0 >> s->ps.sps->hshift[1];
1843         int y0_c = y0 >> s->ps.sps->vshift[1];
1844         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1845         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1846
1847         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1848                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1849                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1850                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1851
1852         if (s->ps.sps->chroma_format_idc) {
1853             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1854                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1855                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1856             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1857                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1858                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1859         }
1860     } else if (current_mv.pred_flag == PF_L1) {
1861         int x0_c = x0 >> s->ps.sps->hshift[1];
1862         int y0_c = y0 >> s->ps.sps->vshift[1];
1863         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1864         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1865
1866         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1867                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1868                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1869                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1870
1871         if (s->ps.sps->chroma_format_idc) {
1872             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1873                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1874                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1875
1876             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1877                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1878                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1879         }
1880     } else if (current_mv.pred_flag == PF_BI) {
1881         int x0_c = x0 >> s->ps.sps->hshift[1];
1882         int y0_c = y0 >> s->ps.sps->vshift[1];
1883         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1884         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1885
1886         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1887                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1888                    ref1->frame, &current_mv.mv[1], &current_mv);
1889
1890         if (s->ps.sps->chroma_format_idc) {
1891             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1892                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1893
1894             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1895                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1896         }
1897     }
1898 }
1899
1900 /**
1901  * 8.4.1
1902  */
1903 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1904                                 int prev_intra_luma_pred_flag)
1905 {
1906     HEVCLocalContext *lc = s->HEVClc;
1907     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1908     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1909     int min_pu_width     = s->ps.sps->min_pu_width;
1910     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1911     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1912     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1913
1914     int cand_up   = (lc->ctb_up_flag || y0b) ?
1915                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1916     int cand_left = (lc->ctb_left_flag || x0b) ?
1917                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1918
1919     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1920
1921     MvField *tab_mvf = s->ref->tab_mvf;
1922     int intra_pred_mode;
1923     int candidate[3];
1924     int i, j;
1925
1926     // intra_pred_mode prediction does not cross vertical CTB boundaries
1927     if ((y0 - 1) < y_ctb)
1928         cand_up = INTRA_DC;
1929
1930     if (cand_left == cand_up) {
1931         if (cand_left < 2) {
1932             candidate[0] = INTRA_PLANAR;
1933             candidate[1] = INTRA_DC;
1934             candidate[2] = INTRA_ANGULAR_26;
1935         } else {
1936             candidate[0] = cand_left;
1937             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1938             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1939         }
1940     } else {
1941         candidate[0] = cand_left;
1942         candidate[1] = cand_up;
1943         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1944             candidate[2] = INTRA_PLANAR;
1945         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1946             candidate[2] = INTRA_DC;
1947         } else {
1948             candidate[2] = INTRA_ANGULAR_26;
1949         }
1950     }
1951
1952     if (prev_intra_luma_pred_flag) {
1953         intra_pred_mode = candidate[lc->pu.mpm_idx];
1954     } else {
1955         if (candidate[0] > candidate[1])
1956             FFSWAP(uint8_t, candidate[0], candidate[1]);
1957         if (candidate[0] > candidate[2])
1958             FFSWAP(uint8_t, candidate[0], candidate[2]);
1959         if (candidate[1] > candidate[2])
1960             FFSWAP(uint8_t, candidate[1], candidate[2]);
1961
1962         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1963         for (i = 0; i < 3; i++)
1964             if (intra_pred_mode >= candidate[i])
1965                 intra_pred_mode++;
1966     }
1967
1968     /* write the intra prediction units into the mv array */
1969     if (!size_in_pus)
1970         size_in_pus = 1;
1971     for (i = 0; i < size_in_pus; i++) {
1972         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1973                intra_pred_mode, size_in_pus);
1974
1975         for (j = 0; j < size_in_pus; j++) {
1976             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1977         }
1978     }
1979
1980     return intra_pred_mode;
1981 }
1982
1983 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1984                                           int log2_cb_size, int ct_depth)
1985 {
1986     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1987     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1988     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1989     int y;
1990
1991     for (y = 0; y < length; y++)
1992         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1993                ct_depth, length);
1994 }
1995
1996 static const uint8_t tab_mode_idx[] = {
1997      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1998     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1999
2000 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
2001                                   int log2_cb_size)
2002 {
2003     HEVCLocalContext *lc = s->HEVClc;
2004     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
2005     uint8_t prev_intra_luma_pred_flag[4];
2006     int split   = lc->cu.part_mode == PART_NxN;
2007     int pb_size = (1 << log2_cb_size) >> split;
2008     int side    = split + 1;
2009     int chroma_mode;
2010     int i, j;
2011
2012     for (i = 0; i < side; i++)
2013         for (j = 0; j < side; j++)
2014             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
2015
2016     for (i = 0; i < side; i++) {
2017         for (j = 0; j < side; j++) {
2018             if (prev_intra_luma_pred_flag[2 * i + j])
2019                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
2020             else
2021                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
2022
2023             lc->pu.intra_pred_mode[2 * i + j] =
2024                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
2025                                      prev_intra_luma_pred_flag[2 * i + j]);
2026         }
2027     }
2028
2029     if (s->ps.sps->chroma_format_idc == 3) {
2030         for (i = 0; i < side; i++) {
2031             for (j = 0; j < side; j++) {
2032                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2033                 if (chroma_mode != 4) {
2034                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
2035                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
2036                     else
2037                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
2038                 } else {
2039                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
2040                 }
2041             }
2042         }
2043     } else if (s->ps.sps->chroma_format_idc == 2) {
2044         int mode_idx;
2045         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2046         if (chroma_mode != 4) {
2047             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2048                 mode_idx = 34;
2049             else
2050                 mode_idx = intra_chroma_table[chroma_mode];
2051         } else {
2052             mode_idx = lc->pu.intra_pred_mode[0];
2053         }
2054         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
2055     } else if (s->ps.sps->chroma_format_idc != 0) {
2056         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
2057         if (chroma_mode != 4) {
2058             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
2059                 lc->pu.intra_pred_mode_c[0] = 34;
2060             else
2061                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
2062         } else {
2063             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
2064         }
2065     }
2066 }
2067
2068 static void intra_prediction_unit_default_value(HEVCContext *s,
2069                                                 int x0, int y0,
2070                                                 int log2_cb_size)
2071 {
2072     HEVCLocalContext *lc = s->HEVClc;
2073     int pb_size          = 1 << log2_cb_size;
2074     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
2075     int min_pu_width     = s->ps.sps->min_pu_width;
2076     MvField *tab_mvf     = s->ref->tab_mvf;
2077     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2078     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2079     int j, k;
2080
2081     if (size_in_pus == 0)
2082         size_in_pus = 1;
2083     for (j = 0; j < size_in_pus; j++)
2084         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2085     if (lc->cu.pred_mode == MODE_INTRA)
2086         for (j = 0; j < size_in_pus; j++)
2087             for (k = 0; k < size_in_pus; k++)
2088                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2089 }
2090
2091 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2092 {
2093     int cb_size          = 1 << log2_cb_size;
2094     HEVCLocalContext *lc = s->HEVClc;
2095     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2096     int length           = cb_size >> log2_min_cb_size;
2097     int min_cb_width     = s->ps.sps->min_cb_width;
2098     int x_cb             = x0 >> log2_min_cb_size;
2099     int y_cb             = y0 >> log2_min_cb_size;
2100     int idx              = log2_cb_size - 2;
2101     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2102     int x, y, ret;
2103
2104     lc->cu.x                = x0;
2105     lc->cu.y                = y0;
2106     lc->cu.pred_mode        = MODE_INTRA;
2107     lc->cu.part_mode        = PART_2Nx2N;
2108     lc->cu.intra_split_flag = 0;
2109
2110     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2111     for (x = 0; x < 4; x++)
2112         lc->pu.intra_pred_mode[x] = 1;
2113     if (s->ps.pps->transquant_bypass_enable_flag) {
2114         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2115         if (lc->cu.cu_transquant_bypass_flag)
2116             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2117     } else
2118         lc->cu.cu_transquant_bypass_flag = 0;
2119
2120     if (s->sh.slice_type != HEVC_SLICE_I) {
2121         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2122
2123         x = y_cb * min_cb_width + x_cb;
2124         for (y = 0; y < length; y++) {
2125             memset(&s->skip_flag[x], skip_flag, length);
2126             x += min_cb_width;
2127         }
2128         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2129     } else {
2130         x = y_cb * min_cb_width + x_cb;
2131         for (y = 0; y < length; y++) {
2132             memset(&s->skip_flag[x], 0, length);
2133             x += min_cb_width;
2134         }
2135     }
2136
2137     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2138         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2139         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2140
2141         if (!s->sh.disable_deblocking_filter_flag)
2142             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2143     } else {
2144         int pcm_flag = 0;
2145
2146         if (s->sh.slice_type != HEVC_SLICE_I)
2147             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2148         if (lc->cu.pred_mode != MODE_INTRA ||
2149             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2150             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2151             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2152                                       lc->cu.pred_mode == MODE_INTRA;
2153         }
2154
2155         if (lc->cu.pred_mode == MODE_INTRA) {
2156             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2157                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2158                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2159                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2160             }
2161             if (pcm_flag) {
2162                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2163                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2164                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2165                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2166
2167                 if (ret < 0)
2168                     return ret;
2169             } else {
2170                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2171             }
2172         } else {
2173             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2174             switch (lc->cu.part_mode) {
2175             case PART_2Nx2N:
2176                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2177                 break;
2178             case PART_2NxN:
2179                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2180                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2181                 break;
2182             case PART_Nx2N:
2183                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2184                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2185                 break;
2186             case PART_2NxnU:
2187                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2188                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2189                 break;
2190             case PART_2NxnD:
2191                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2192                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2193                 break;
2194             case PART_nLx2N:
2195                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2196                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2197                 break;
2198             case PART_nRx2N:
2199                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2200                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2201                 break;
2202             case PART_NxN:
2203                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2204                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2205                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2206                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2207                 break;
2208             }
2209         }
2210
2211         if (!pcm_flag) {
2212             int rqt_root_cbf = 1;
2213
2214             if (lc->cu.pred_mode != MODE_INTRA &&
2215                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2216                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2217             }
2218             if (rqt_root_cbf) {
2219                 const static int cbf[2] = { 0 };
2220                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2221                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2222                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2223                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2224                                          log2_cb_size,
2225                                          log2_cb_size, 0, 0, cbf, cbf);
2226                 if (ret < 0)
2227                     return ret;
2228             } else {
2229                 if (!s->sh.disable_deblocking_filter_flag)
2230                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2231             }
2232         }
2233     }
2234
2235     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2236         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2237
2238     x = y_cb * min_cb_width + x_cb;
2239     for (y = 0; y < length; y++) {
2240         memset(&s->qp_y_tab[x], lc->qp_y, length);
2241         x += min_cb_width;
2242     }
2243
2244     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2245        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2246         lc->qPy_pred = lc->qp_y;
2247     }
2248
2249     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2250
2251     return 0;
2252 }
2253
2254 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2255                                int log2_cb_size, int cb_depth)
2256 {
2257     HEVCLocalContext *lc = s->HEVClc;
2258     const int cb_size    = 1 << log2_cb_size;
2259     int ret;
2260     int split_cu;
2261
2262     lc->ct_depth = cb_depth;
2263     if (x0 + cb_size <= s->ps.sps->width  &&
2264         y0 + cb_size <= s->ps.sps->height &&
2265         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2266         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2267     } else {
2268         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2269     }
2270     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2271         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2272         lc->tu.is_cu_qp_delta_coded = 0;
2273         lc->tu.cu_qp_delta          = 0;
2274     }
2275
2276     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2277         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2278         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2279     }
2280
2281     if (split_cu) {
2282         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2283         const int cb_size_split = cb_size >> 1;
2284         const int x1 = x0 + cb_size_split;
2285         const int y1 = y0 + cb_size_split;
2286
2287         int more_data = 0;
2288
2289         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2290         if (more_data < 0)
2291             return more_data;
2292
2293         if (more_data && x1 < s->ps.sps->width) {
2294             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2295             if (more_data < 0)
2296                 return more_data;
2297         }
2298         if (more_data && y1 < s->ps.sps->height) {
2299             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2300             if (more_data < 0)
2301                 return more_data;
2302         }
2303         if (more_data && x1 < s->ps.sps->width &&
2304             y1 < s->ps.sps->height) {
2305             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2306             if (more_data < 0)
2307                 return more_data;
2308         }
2309
2310         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2311             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2312             lc->qPy_pred = lc->qp_y;
2313
2314         if (more_data)
2315             return ((x1 + cb_size_split) < s->ps.sps->width ||
2316                     (y1 + cb_size_split) < s->ps.sps->height);
2317         else
2318             return 0;
2319     } else {
2320         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2321         if (ret < 0)
2322             return ret;
2323         if ((!((x0 + cb_size) %
2324                (1 << (s->ps.sps->log2_ctb_size))) ||
2325              (x0 + cb_size >= s->ps.sps->width)) &&
2326             (!((y0 + cb_size) %
2327                (1 << (s->ps.sps->log2_ctb_size))) ||
2328              (y0 + cb_size >= s->ps.sps->height))) {
2329             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2330             return !end_of_slice_flag;
2331         } else {
2332             return 1;
2333         }
2334     }
2335
2336     return 0;
2337 }
2338
2339 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2340                                  int ctb_addr_ts)
2341 {
2342     HEVCLocalContext *lc  = s->HEVClc;
2343     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2344     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2345     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2346
2347     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2348
2349     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2350         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2351             lc->first_qp_group = 1;
2352         lc->end_of_tiles_x = s->ps.sps->width;
2353     } else if (s->ps.pps->tiles_enabled_flag) {
2354         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2355             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2356             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2357             lc->first_qp_group   = 1;
2358         }
2359     } else {
2360         lc->end_of_tiles_x = s->ps.sps->width;
2361     }
2362
2363     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2364
2365     lc->boundary_flags = 0;
2366     if (s->ps.pps->tiles_enabled_flag) {
2367         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2368             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2369         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2370             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2371         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2372             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2373         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2374             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2375     } else {
2376         if (ctb_addr_in_slice <= 0)
2377             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2378         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2379             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2380     }
2381
2382     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2383     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2384     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2385     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2386 }
2387
2388 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2389 {
2390     HEVCContext *s  = avctxt->priv_data;
2391     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2392     int more_data   = 1;
2393     int x_ctb       = 0;
2394     int y_ctb       = 0;
2395     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2396     int ret;
2397
2398     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2399         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2400         return AVERROR_INVALIDDATA;
2401     }
2402
2403     if (s->sh.dependent_slice_segment_flag) {
2404         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2405         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2406             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2407             return AVERROR_INVALIDDATA;
2408         }
2409     }
2410
2411     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2412         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2413
2414         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2415         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2416         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2417
2418         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2419         if (ret < 0) {
2420             s->tab_slice_address[ctb_addr_rs] = -1;
2421             return ret;
2422         }
2423
2424         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2425
2426         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2427         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2428         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2429
2430         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2431         if (more_data < 0) {
2432             s->tab_slice_address[ctb_addr_rs] = -1;
2433             return more_data;
2434         }
2435
2436
2437         ctb_addr_ts++;
2438         ff_hevc_save_states(s, ctb_addr_ts);
2439         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2440     }
2441
2442     if (x_ctb + ctb_size >= s->ps.sps->width &&
2443         y_ctb + ctb_size >= s->ps.sps->height)
2444         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2445
2446     return ctb_addr_ts;
2447 }
2448
2449 static int hls_slice_data(HEVCContext *s)
2450 {
2451     int arg[2];
2452     int ret[2];
2453
2454     arg[0] = 0;
2455     arg[1] = 1;
2456
2457     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2458     return ret[0];
2459 }
2460 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2461 {
2462     HEVCContext *s1  = avctxt->priv_data, *s;
2463     HEVCLocalContext *lc;
2464     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2465     int more_data   = 1;
2466     int *ctb_row_p    = input_ctb_row;
2467     int ctb_row = ctb_row_p[job];
2468     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2469     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2470     int thread = ctb_row % s1->threads_number;
2471     int ret;
2472
2473     s = s1->sList[self_id];
2474     lc = s->HEVClc;
2475
2476     if(ctb_row) {
2477         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2478         if (ret < 0)
2479             goto error;
2480         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2481     }
2482
2483     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2484         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2485         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2486
2487         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2488
2489         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2490
2491         if (atomic_load(&s1->wpp_err)) {
2492             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2493             return 0;
2494         }
2495
2496         ret = ff_hevc_cabac_init(s, ctb_addr_ts);
2497         if (ret < 0)
2498             goto error;
2499         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2500         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2501
2502         if (more_data < 0) {
2503             ret = more_data;
2504             goto error;
2505         }
2506
2507         ctb_addr_ts++;
2508
2509         ff_hevc_save_states(s, ctb_addr_ts);
2510         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2511         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2512
2513         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2514             atomic_store(&s1->wpp_err, 1);
2515             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2516             return 0;
2517         }
2518
2519         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2520             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2521             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2522             return ctb_addr_ts;
2523         }
2524         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2525         x_ctb+=ctb_size;
2526
2527         if(x_ctb >= s->ps.sps->width) {
2528             break;
2529         }
2530     }
2531     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2532
2533     return 0;
2534 error:
2535     s->tab_slice_address[ctb_addr_rs] = -1;
2536     atomic_store(&s1->wpp_err, 1);
2537     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2538     return ret;
2539 }
2540
2541 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2542 {
2543     const uint8_t *data = nal->data;
2544     int length          = nal->size;
2545     HEVCLocalContext *lc = s->HEVClc;
2546     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2547     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2548     int64_t offset;
2549     int64_t startheader, cmpt = 0;
2550     int i, j, res = 0;
2551
2552     if (!ret || !arg) {
2553         av_free(ret);
2554         av_free(arg);
2555         return AVERROR(ENOMEM);
2556     }
2557
2558     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2559         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2560             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2561             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2562         );
2563         res = AVERROR_INVALIDDATA;
2564         goto error;
2565     }
2566
2567     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2568
2569     if (!s->sList[1]) {
2570         for (i = 1; i < s->threads_number; i++) {
2571             s->sList[i] = av_malloc(sizeof(HEVCContext));
2572             memcpy(s->sList[i], s, sizeof(HEVCContext));
2573             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2574             s->sList[i]->HEVClc = s->HEVClcList[i];
2575         }
2576     }
2577
2578     offset = (lc->gb.index >> 3);
2579
2580     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2581         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2582             startheader--;
2583             cmpt++;
2584         }
2585     }
2586
2587     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2588         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2589         for (j = 0, cmpt = 0, startheader = offset
2590              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2591             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2592                 startheader--;
2593                 cmpt++;
2594             }
2595         }
2596         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2597         s->sh.offset[i - 1] = offset;
2598
2599     }
2600     if (s->sh.num_entry_point_offsets != 0) {
2601         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2602         if (length < offset) {
2603             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2604             res = AVERROR_INVALIDDATA;
2605             goto error;
2606         }
2607         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2608         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2609
2610     }
2611     s->data = data;
2612
2613     for (i = 1; i < s->threads_number; i++) {
2614         s->sList[i]->HEVClc->first_qp_group = 1;
2615         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2616         memcpy(s->sList[i], s, sizeof(HEVCContext));
2617         s->sList[i]->HEVClc = s->HEVClcList[i];
2618     }
2619
2620     atomic_store(&s->wpp_err, 0);
2621     ff_reset_entries(s->avctx);
2622
2623     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2624         arg[i] = i;
2625         ret[i] = 0;
2626     }
2627
2628     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2629         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2630
2631     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2632         res += ret[i];
2633 error:
2634     av_free(ret);
2635     av_free(arg);
2636     return res;
2637 }
2638
2639 static int set_side_data(HEVCContext *s)
2640 {
2641     AVFrame *out = s->ref->frame;
2642
2643     if (s->sei.frame_packing.present &&
2644         s->sei.frame_packing.arrangement_type >= 3 &&
2645         s->sei.frame_packing.arrangement_type <= 5 &&
2646         s->sei.frame_packing.content_interpretation_type > 0 &&
2647         s->sei.frame_packing.content_interpretation_type < 3) {
2648         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2649         if (!stereo)
2650             return AVERROR(ENOMEM);
2651
2652         switch (s->sei.frame_packing.arrangement_type) {
2653         case 3:
2654             if (s->sei.frame_packing.quincunx_subsampling)
2655                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2656             else
2657                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2658             break;
2659         case 4:
2660             stereo->type = AV_STEREO3D_TOPBOTTOM;
2661             break;
2662         case 5:
2663             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2664             break;
2665         }
2666
2667         if (s->sei.frame_packing.content_interpretation_type == 2)
2668             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2669
2670         if (s->sei.frame_packing.arrangement_type == 5) {
2671             if (s->sei.frame_packing.current_frame_is_frame0_flag)
2672                 stereo->view = AV_STEREO3D_VIEW_LEFT;
2673             else
2674                 stereo->view = AV_STEREO3D_VIEW_RIGHT;
2675         }
2676     }
2677
2678     if (s->sei.display_orientation.present &&
2679         (s->sei.display_orientation.anticlockwise_rotation ||
2680          s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
2681         double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
2682         AVFrameSideData *rotation = av_frame_new_side_data(out,
2683                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2684                                                            sizeof(int32_t) * 9);
2685         if (!rotation)
2686             return AVERROR(ENOMEM);
2687
2688         av_display_rotation_set((int32_t *)rotation->data, angle);
2689         av_display_matrix_flip((int32_t *)rotation->data,
2690                                s->sei.display_orientation.hflip,
2691                                s->sei.display_orientation.vflip);
2692     }
2693
2694     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2695     // so the side data persists for the entire coded video sequence.
2696     if (s->sei.mastering_display.present > 0 &&
2697         IS_IRAP(s) && s->no_rasl_output_flag) {
2698         s->sei.mastering_display.present--;
2699     }
2700     if (s->sei.mastering_display.present) {
2701         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2702         const int mapping[3] = {2, 0, 1};
2703         const int chroma_den = 50000;
2704         const int luma_den = 10000;
2705         int i;
2706         AVMasteringDisplayMetadata *metadata =
2707             av_mastering_display_metadata_create_side_data(out);
2708         if (!metadata)
2709             return AVERROR(ENOMEM);
2710
2711         for (i = 0; i < 3; i++) {
2712             const int j = mapping[i];
2713             metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
2714             metadata->display_primaries[i][0].den = chroma_den;
2715             metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
2716             metadata->display_primaries[i][1].den = chroma_den;
2717         }
2718         metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
2719         metadata->white_point[0].den = chroma_den;
2720         metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
2721         metadata->white_point[1].den = chroma_den;
2722
2723         metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
2724         metadata->max_luminance.den = luma_den;
2725         metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
2726         metadata->min_luminance.den = luma_den;
2727         metadata->has_luminance = 1;
2728         metadata->has_primaries = 1;
2729
2730         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2731         av_log(s->avctx, AV_LOG_DEBUG,
2732                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2733                av_q2d(metadata->display_primaries[0][0]),
2734                av_q2d(metadata->display_primaries[0][1]),
2735                av_q2d(metadata->display_primaries[1][0]),
2736                av_q2d(metadata->display_primaries[1][1]),
2737                av_q2d(metadata->display_primaries[2][0]),
2738                av_q2d(metadata->display_primaries[2][1]),
2739                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2740         av_log(s->avctx, AV_LOG_DEBUG,
2741                "min_luminance=%f, max_luminance=%f\n",
2742                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2743     }
2744     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2745     // so the side data persists for the entire coded video sequence.
2746     if (s->sei.content_light.present > 0 &&
2747         IS_IRAP(s) && s->no_rasl_output_flag) {
2748         s->sei.content_light.present--;
2749     }
2750     if (s->sei.content_light.present) {
2751         AVContentLightMetadata *metadata =
2752             av_content_light_metadata_create_side_data(out);
2753         if (!metadata)
2754             return AVERROR(ENOMEM);
2755         metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
2756         metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
2757
2758         av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
2759         av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
2760                metadata->MaxCLL, metadata->MaxFALL);
2761     }
2762
2763     if (s->sei.a53_caption.a53_caption) {
2764         AVFrameSideData* sd = av_frame_new_side_data(out,
2765                                                      AV_FRAME_DATA_A53_CC,
2766                                                      s->sei.a53_caption.a53_caption_size);
2767         if (sd)
2768             memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
2769         av_freep(&s->sei.a53_caption.a53_caption);
2770         s->sei.a53_caption.a53_caption_size = 0;
2771         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2772     }
2773
2774     if (s->sei.alternative_transfer.present &&
2775         av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
2776         s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
2777         s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
2778     }
2779
2780     return 0;
2781 }
2782
2783 static int hevc_frame_start(HEVCContext *s)
2784 {
2785     HEVCLocalContext *lc = s->HEVClc;
2786     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2787                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2788     int ret;
2789
2790     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2791     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2792     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2793     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2794     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2795
2796     s->is_decoded        = 0;
2797     s->first_nal_type    = s->nal_unit_type;
2798
2799     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
2800
2801     if (s->ps.pps->tiles_enabled_flag)
2802         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2803
2804     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2805     if (ret < 0)
2806         goto fail;
2807
2808     ret = ff_hevc_frame_rps(s);
2809     if (ret < 0) {
2810         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2811         goto fail;
2812     }
2813
2814     s->ref->frame->key_frame = IS_IRAP(s);
2815
2816     ret = set_side_data(s);
2817     if (ret < 0)
2818         goto fail;
2819
2820     s->frame->pict_type = 3 - s->sh.slice_type;
2821
2822     if (!IS_IRAP(s))
2823         ff_hevc_bump_frame(s);
2824
2825     av_frame_unref(s->output_frame);
2826     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2827     if (ret < 0)
2828         goto fail;
2829
2830     if (!s->avctx->hwaccel)
2831         ff_thread_finish_setup(s->avctx);
2832
2833     return 0;
2834
2835 fail:
2836     if (s->ref)
2837         ff_hevc_unref_frame(s, s->ref, ~0);
2838     s->ref = NULL;
2839     return ret;
2840 }
2841
2842 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2843 {
2844     HEVCLocalContext *lc = s->HEVClc;
2845     GetBitContext *gb    = &lc->gb;
2846     int ctb_addr_ts, ret;
2847
2848     *gb              = nal->gb;
2849     s->nal_unit_type = nal->type;
2850     s->temporal_id   = nal->temporal_id;
2851
2852     switch (s->nal_unit_type) {
2853     case HEVC_NAL_VPS:
2854         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2855             ret = s->avctx->hwaccel->decode_params(s->avctx,
2856                                                    nal->type,
2857                                                    nal->raw_data,
2858                                                    nal->raw_size);
2859             if (ret < 0)
2860                 goto fail;
2861         }
2862         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2863         if (ret < 0)
2864             goto fail;
2865         break;
2866     case HEVC_NAL_SPS:
2867         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2868             ret = s->avctx->hwaccel->decode_params(s->avctx,
2869                                                    nal->type,
2870                                                    nal->raw_data,
2871                                                    nal->raw_size);
2872             if (ret < 0)
2873                 goto fail;
2874         }
2875         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2876                                      s->apply_defdispwin);
2877         if (ret < 0)
2878             goto fail;
2879         break;
2880     case HEVC_NAL_PPS:
2881         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2882             ret = s->avctx->hwaccel->decode_params(s->avctx,
2883                                                    nal->type,
2884                                                    nal->raw_data,
2885                                                    nal->raw_size);
2886             if (ret < 0)
2887                 goto fail;
2888         }
2889         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2890         if (ret < 0)
2891             goto fail;
2892         break;
2893     case HEVC_NAL_SEI_PREFIX:
2894     case HEVC_NAL_SEI_SUFFIX:
2895         if (s->avctx->hwaccel && s->avctx->hwaccel->decode_params) {
2896             ret = s->avctx->hwaccel->decode_params(s->avctx,
2897                                                    nal->type,
2898                                                    nal->raw_data,
2899                                                    nal->raw_size);
2900             if (ret < 0)
2901                 goto fail;
2902         }
2903         ret = ff_hevc_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
2904         if (ret < 0)
2905             goto fail;
2906         break;
2907     case HEVC_NAL_TRAIL_R:
2908     case HEVC_NAL_TRAIL_N:
2909     case HEVC_NAL_TSA_N:
2910     case HEVC_NAL_TSA_R:
2911     case HEVC_NAL_STSA_N:
2912     case HEVC_NAL_STSA_R:
2913     case HEVC_NAL_BLA_W_LP:
2914     case HEVC_NAL_BLA_W_RADL:
2915     case HEVC_NAL_BLA_N_LP:
2916     case HEVC_NAL_IDR_W_RADL:
2917     case HEVC_NAL_IDR_N_LP:
2918     case HEVC_NAL_CRA_NUT:
2919     case HEVC_NAL_RADL_N:
2920     case HEVC_NAL_RADL_R:
2921     case HEVC_NAL_RASL_N:
2922     case HEVC_NAL_RASL_R:
2923         ret = hls_slice_header(s);
2924         if (ret < 0)
2925             return ret;
2926         if (ret == 1) {
2927             ret = AVERROR_INVALIDDATA;
2928             goto fail;
2929         }
2930
2931
2932         if (
2933             (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
2934             (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
2935             (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s))) {
2936             break;
2937         }
2938
2939         if (s->sh.first_slice_in_pic_flag) {
2940             if (s->max_ra == INT_MAX) {
2941                 if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2942                     s->max_ra = s->poc;
2943                 } else {
2944                     if (IS_IDR(s))
2945                         s->max_ra = INT_MIN;
2946                 }
2947             }
2948
2949             if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2950                 s->poc <= s->max_ra) {
2951                 s->is_decoded = 0;
2952                 break;
2953             } else {
2954                 if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2955                     s->max_ra = INT_MIN;
2956             }
2957
2958             s->overlap ++;
2959             ret = hevc_frame_start(s);
2960             if (ret < 0)
2961                 return ret;
2962         } else if (!s->ref) {
2963             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2964             goto fail;
2965         }
2966
2967         if (s->nal_unit_type != s->first_nal_type) {
2968             av_log(s->avctx, AV_LOG_ERROR,
2969                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2970                    s->first_nal_type, s->nal_unit_type);
2971             return AVERROR_INVALIDDATA;
2972         }
2973
2974         if (!s->sh.dependent_slice_segment_flag &&
2975             s->sh.slice_type != HEVC_SLICE_I) {
2976             ret = ff_hevc_slice_rpl(s);
2977             if (ret < 0) {
2978                 av_log(s->avctx, AV_LOG_WARNING,
2979                        "Error constructing the reference lists for the current slice.\n");
2980                 goto fail;
2981             }
2982         }
2983
2984         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2985             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2986             if (ret < 0)
2987                 goto fail;
2988         }
2989
2990         if (s->avctx->hwaccel) {
2991             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2992             if (ret < 0)
2993                 goto fail;
2994         } else {
2995             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2996                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2997             else
2998                 ctb_addr_ts = hls_slice_data(s);
2999             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
3000                 s->is_decoded = 1;
3001             }
3002
3003             if (ctb_addr_ts < 0) {
3004                 ret = ctb_addr_ts;
3005                 goto fail;
3006             }
3007         }
3008         break;
3009     case HEVC_NAL_EOS_NUT:
3010     case HEVC_NAL_EOB_NUT:
3011         s->seq_decode = (s->seq_decode + 1) & 0xff;
3012         s->max_ra     = INT_MAX;
3013         break;
3014     case HEVC_NAL_AUD:
3015     case HEVC_NAL_FD_NUT:
3016         break;
3017     default:
3018         av_log(s->avctx, AV_LOG_INFO,
3019                "Skipping NAL unit %d\n", s->nal_unit_type);
3020     }
3021
3022     return 0;
3023 fail:
3024     if (s->avctx->err_recognition & AV_EF_EXPLODE)
3025         return ret;
3026     return 0;
3027 }
3028
3029 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
3030 {
3031     int i, ret = 0;
3032     int eos_at_start = 1;
3033
3034     s->ref = NULL;
3035     s->last_eos = s->eos;
3036     s->eos = 0;
3037     s->overlap = 0;
3038
3039     /* split the input packet into NAL units, so we know the upper bound on the
3040      * number of slices in the frame */
3041     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
3042                                 s->nal_length_size, s->avctx->codec_id, 1, 0);
3043     if (ret < 0) {
3044         av_log(s->avctx, AV_LOG_ERROR,
3045                "Error splitting the input into NAL units.\n");
3046         return ret;
3047     }
3048
3049     for (i = 0; i < s->pkt.nb_nals; i++) {
3050         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
3051             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
3052             if (eos_at_start) {
3053                 s->last_eos = 1;
3054             } else {
3055                 s->eos = 1;
3056             }
3057         } else {
3058             eos_at_start = 0;
3059         }
3060     }
3061
3062     /* decode the NAL units */
3063     for (i = 0; i < s->pkt.nb_nals; i++) {
3064         H2645NAL *nal = &s->pkt.nals[i];
3065
3066         if (s->avctx->skip_frame >= AVDISCARD_ALL ||
3067             (s->avctx->skip_frame >= AVDISCARD_NONREF
3068             && ff_hevc_nal_is_nonref(nal->type)))
3069             continue;
3070
3071         ret = decode_nal_unit(s, nal);
3072         if (ret >= 0 && s->overlap > 2)
3073             ret = AVERROR_INVALIDDATA;
3074         if (ret < 0) {
3075             av_log(s->avctx, AV_LOG_WARNING,
3076                    "Error parsing NAL unit #%d.\n", i);
3077             goto fail;
3078         }
3079     }
3080
3081 fail:
3082     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3083         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3084
3085     return ret;
3086 }
3087
3088 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3089 {
3090     int i;
3091     for (i = 0; i < 16; i++)
3092         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3093 }
3094
3095 static int verify_md5(HEVCContext *s, AVFrame *frame)
3096 {
3097     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3098     int pixel_shift;
3099     int i, j;
3100
3101     if (!desc)
3102         return AVERROR(EINVAL);
3103
3104     pixel_shift = desc->comp[0].depth > 8;
3105
3106     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3107            s->poc);
3108
3109     /* the checksums are LE, so we have to byteswap for >8bpp formats
3110      * on BE arches */
3111 #if HAVE_BIGENDIAN
3112     if (pixel_shift && !s->checksum_buf) {
3113         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3114                        FFMAX3(frame->linesize[0], frame->linesize[1],
3115                               frame->linesize[2]));
3116         if (!s->checksum_buf)
3117             return AVERROR(ENOMEM);
3118     }
3119 #endif
3120
3121     for (i = 0; frame->data[i]; i++) {
3122         int width  = s->avctx->coded_width;
3123         int height = s->avctx->coded_height;
3124         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3125         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3126         uint8_t md5[16];
3127
3128         av_md5_init(s->md5_ctx);
3129         for (j = 0; j < h; j++) {
3130             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3131 #if HAVE_BIGENDIAN
3132             if (pixel_shift) {
3133                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3134                                     (const uint16_t *) src, w);
3135                 src = s->checksum_buf;
3136             }
3137 #endif
3138             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3139         }
3140         av_md5_final(s->md5_ctx, md5);
3141
3142         if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
3143             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3144             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3145             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3146         } else {
3147             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3148             print_md5(s->avctx, AV_LOG_ERROR, md5);
3149             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3150             print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
3151             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3152             return AVERROR_INVALIDDATA;
3153         }
3154     }
3155
3156     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3157
3158     return 0;
3159 }
3160
3161 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length, int first)
3162 {
3163     int ret, i;
3164
3165     ret = ff_hevc_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
3166                                    &s->nal_length_size, s->avctx->err_recognition,
3167                                    s->apply_defdispwin, s->avctx);
3168     if (ret < 0)
3169         return ret;
3170
3171     /* export stream parameters from the first SPS */
3172     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3173         if (first && s->ps.sps_list[i]) {
3174             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3175             export_stream_params(s->avctx, &s->ps, sps);
3176             break;
3177         }
3178     }
3179
3180     return 0;
3181 }
3182
3183 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3184                              AVPacket *avpkt)
3185 {
3186     int ret;
3187     int new_extradata_size;
3188     uint8_t *new_extradata;
3189     HEVCContext *s = avctx->priv_data;
3190
3191     if (!avpkt->size) {
3192         ret = ff_hevc_output_frame(s, data, 1);
3193         if (ret < 0)
3194             return ret;
3195
3196         *got_output = ret;
3197         return 0;
3198     }
3199
3200     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3201                                             &new_extradata_size);
3202     if (new_extradata && new_extradata_size > 0) {
3203         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
3204         if (ret < 0)
3205             return ret;
3206     }
3207
3208     s->ref = NULL;
3209     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3210     if (ret < 0)
3211         return ret;
3212
3213     if (avctx->hwaccel) {
3214         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3215             av_log(avctx, AV_LOG_ERROR,
3216                    "hardware accelerator failed to decode picture\n");
3217             ff_hevc_unref_frame(s, s->ref, ~0);
3218             return ret;
3219         }
3220     } else {
3221         /* verify the SEI checksum */
3222         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3223             s->sei.picture_hash.is_md5) {
3224             ret = verify_md5(s, s->ref->frame);
3225             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3226                 ff_hevc_unref_frame(s, s->ref, ~0);
3227                 return ret;
3228             }
3229         }
3230     }
3231     s->sei.picture_hash.is_md5 = 0;
3232
3233     if (s->is_decoded) {
3234         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3235         s->is_decoded = 0;
3236     }
3237
3238     if (s->output_frame->buf[0]) {
3239         av_frame_move_ref(data, s->output_frame);
3240         *got_output = 1;
3241     }
3242
3243     return avpkt->size;
3244 }
3245
3246 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3247 {
3248     int ret;
3249
3250     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3251     if (ret < 0)
3252         return ret;
3253
3254     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3255     if (!dst->tab_mvf_buf)
3256         goto fail;
3257     dst->tab_mvf = src->tab_mvf;
3258
3259     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3260     if (!dst->rpl_tab_buf)
3261         goto fail;
3262     dst->rpl_tab = src->rpl_tab;
3263
3264     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3265     if (!dst->rpl_buf)
3266         goto fail;
3267
3268     dst->poc        = src->poc;
3269     dst->ctb_count  = src->ctb_count;
3270     dst->flags      = src->flags;
3271     dst->sequence   = src->sequence;
3272
3273     if (src->hwaccel_picture_private) {
3274         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3275         if (!dst->hwaccel_priv_buf)
3276             goto fail;
3277         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3278     }
3279
3280     return 0;
3281 fail:
3282     ff_hevc_unref_frame(s, dst, ~0);
3283     return AVERROR(ENOMEM);
3284 }
3285
3286 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3287 {
3288     HEVCContext       *s = avctx->priv_data;
3289     int i;
3290
3291     pic_arrays_free(s);
3292
3293     av_freep(&s->md5_ctx);
3294
3295     av_freep(&s->cabac_state);
3296
3297     for (i = 0; i < 3; i++) {
3298         av_freep(&s->sao_pixel_buffer_h[i]);
3299         av_freep(&s->sao_pixel_buffer_v[i]);
3300     }
3301     av_frame_free(&s->output_frame);
3302
3303     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3304         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3305         av_frame_free(&s->DPB[i].frame);
3306     }
3307
3308     ff_hevc_ps_uninit(&s->ps);
3309
3310     av_freep(&s->sh.entry_point_offset);
3311     av_freep(&s->sh.offset);
3312     av_freep(&s->sh.size);
3313
3314     for (i = 1; i < s->threads_number; i++) {
3315         HEVCLocalContext *lc = s->HEVClcList[i];
3316         if (lc) {
3317             av_freep(&s->HEVClcList[i]);
3318             av_freep(&s->sList[i]);
3319         }
3320     }
3321     if (s->HEVClc == s->HEVClcList[0])
3322         s->HEVClc = NULL;
3323     av_freep(&s->HEVClcList[0]);
3324
3325     ff_h2645_packet_uninit(&s->pkt);
3326
3327     return 0;
3328 }
3329
3330 static av_cold int hevc_init_context(AVCodecContext *avctx)
3331 {
3332     HEVCContext *s = avctx->priv_data;
3333     int i;
3334
3335     s->avctx = avctx;
3336
3337     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3338     if (!s->HEVClc)
3339         goto fail;
3340     s->HEVClcList[0] = s->HEVClc;
3341     s->sList[0] = s;
3342
3343     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3344     if (!s->cabac_state)
3345         goto fail;
3346
3347     s->output_frame = av_frame_alloc();
3348     if (!s->output_frame)
3349         goto fail;
3350
3351     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3352         s->DPB[i].frame = av_frame_alloc();
3353         if (!s->DPB[i].frame)
3354             goto fail;
3355         s->DPB[i].tf.f = s->DPB[i].frame;
3356     }
3357
3358     s->max_ra = INT_MAX;
3359
3360     s->md5_ctx = av_md5_alloc();
3361     if (!s->md5_ctx)
3362         goto fail;
3363
3364     ff_bswapdsp_init(&s->bdsp);
3365
3366     s->context_initialized = 1;
3367     s->eos = 0;
3368
3369     ff_hevc_reset_sei(&s->sei);
3370
3371     return 0;
3372
3373 fail:
3374     hevc_decode_free(avctx);
3375     return AVERROR(ENOMEM);
3376 }
3377
3378 #if HAVE_THREADS
3379 static int hevc_update_thread_context(AVCodecContext *dst,
3380                                       const AVCodecContext *src)
3381 {
3382     HEVCContext *s  = dst->priv_data;
3383     HEVCContext *s0 = src->priv_data;
3384     int i, ret;
3385
3386     if (!s->context_initialized) {
3387         ret = hevc_init_context(dst);
3388         if (ret < 0)
3389             return ret;
3390     }
3391
3392     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3393         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3394         if (s0->DPB[i].frame->buf[0]) {
3395             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3396             if (ret < 0)
3397                 return ret;
3398         }
3399     }
3400
3401     if (s->ps.sps != s0->ps.sps)
3402         s->ps.sps = NULL;
3403     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3404         av_buffer_unref(&s->ps.vps_list[i]);
3405         if (s0->ps.vps_list[i]) {
3406             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3407             if (!s->ps.vps_list[i])
3408                 return AVERROR(ENOMEM);
3409         }
3410     }
3411
3412     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3413         av_buffer_unref(&s->ps.sps_list[i]);
3414         if (s0->ps.sps_list[i]) {
3415             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3416             if (!s->ps.sps_list[i])
3417                 return AVERROR(ENOMEM);
3418         }
3419     }
3420
3421     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3422         av_buffer_unref(&s->ps.pps_list[i]);
3423         if (s0->ps.pps_list[i]) {
3424             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3425             if (!s->ps.pps_list[i])
3426                 return AVERROR(ENOMEM);
3427         }
3428     }
3429
3430     if (s->ps.sps != s0->ps.sps)
3431         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3432             return ret;
3433
3434     s->seq_decode = s0->seq_decode;
3435     s->seq_output = s0->seq_output;
3436     s->pocTid0    = s0->pocTid0;
3437     s->max_ra     = s0->max_ra;
3438     s->eos        = s0->eos;
3439     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3440
3441     s->is_nalff        = s0->is_nalff;
3442     s->nal_length_size = s0->nal_length_size;
3443
3444     s->threads_number      = s0->threads_number;
3445     s->threads_type        = s0->threads_type;
3446
3447     if (s0->eos) {
3448         s->seq_decode = (s->seq_decode + 1) & 0xff;
3449         s->max_ra = INT_MAX;
3450     }
3451
3452     s->sei.frame_packing        = s0->sei.frame_packing;
3453     s->sei.display_orientation  = s0->sei.display_orientation;
3454     s->sei.mastering_display    = s0->sei.mastering_display;
3455     s->sei.content_light        = s0->sei.content_light;
3456     s->sei.alternative_transfer = s0->sei.alternative_transfer;
3457
3458     return 0;
3459 }
3460 #endif
3461
3462 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3463 {
3464     HEVCContext *s = avctx->priv_data;
3465     int ret;
3466
3467     avctx->internal->allocate_progress = 1;
3468
3469     ret = hevc_init_context(avctx);
3470     if (ret < 0)
3471         return ret;
3472
3473     s->enable_parallel_tiles = 0;
3474     s->sei.picture_timing.picture_struct = 0;
3475     s->eos = 1;
3476
3477     atomic_init(&s->wpp_err, 0);
3478
3479     if(avctx->active_thread_type & FF_THREAD_SLICE)
3480         s->threads_number = avctx->thread_count;
3481     else
3482         s->threads_number = 1;
3483
3484     if (avctx->extradata_size > 0 && avctx->extradata) {
3485         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
3486         if (ret < 0) {
3487             hevc_decode_free(avctx);
3488             return ret;
3489         }
3490     }
3491
3492     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3493             s->threads_type = FF_THREAD_FRAME;
3494         else
3495             s->threads_type = FF_THREAD_SLICE;
3496
3497     return 0;
3498 }
3499
3500 #if HAVE_THREADS
3501 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3502 {
3503     HEVCContext *s = avctx->priv_data;
3504     int ret;
3505
3506     memset(s, 0, sizeof(*s));
3507
3508     ret = hevc_init_context(avctx);
3509     if (ret < 0)
3510         return ret;
3511
3512     return 0;
3513 }
3514 #endif
3515
3516 static void hevc_decode_flush(AVCodecContext *avctx)
3517 {
3518     HEVCContext *s = avctx->priv_data;
3519     ff_hevc_flush_dpb(s);
3520     s->max_ra = INT_MAX;
3521     s->eos = 1;
3522 }
3523
3524 #define OFFSET(x) offsetof(HEVCContext, x)
3525 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3526
3527 static const AVOption options[] = {
3528     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3529         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3530     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3531         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3532     { NULL },
3533 };
3534
3535 static const AVClass hevc_decoder_class = {
3536     .class_name = "HEVC decoder",
3537     .item_name  = av_default_item_name,
3538     .option     = options,
3539     .version    = LIBAVUTIL_VERSION_INT,
3540 };
3541
3542 AVCodec ff_hevc_decoder = {
3543     .name                  = "hevc",
3544     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3545     .type                  = AVMEDIA_TYPE_VIDEO,
3546     .id                    = AV_CODEC_ID_HEVC,
3547     .priv_data_size        = sizeof(HEVCContext),
3548     .priv_class            = &hevc_decoder_class,
3549     .init                  = hevc_decode_init,
3550     .close                 = hevc_decode_free,
3551     .decode                = hevc_decode_frame,
3552     .flush                 = hevc_decode_flush,
3553     .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
3554     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(hevc_init_thread_copy),
3555     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3556                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3557     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
3558     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3559     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
3560 #if CONFIG_HEVC_DXVA2_HWACCEL
3561                                HWACCEL_DXVA2(hevc),
3562 #endif
3563 #if CONFIG_HEVC_D3D11VA_HWACCEL
3564                                HWACCEL_D3D11VA(hevc),
3565 #endif
3566 #if CONFIG_HEVC_D3D11VA2_HWACCEL
3567                                HWACCEL_D3D11VA2(hevc),
3568 #endif
3569 #if CONFIG_HEVC_NVDEC_HWACCEL
3570                                HWACCEL_NVDEC(hevc),
3571 #endif
3572 #if CONFIG_HEVC_VAAPI_HWACCEL
3573                                HWACCEL_VAAPI(hevc),
3574 #endif
3575 #if CONFIG_HEVC_VDPAU_HWACCEL
3576                                HWACCEL_VDPAU(hevc),
3577 #endif
3578 #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
3579                                HWACCEL_VIDEOTOOLBOX(hevc),
3580 #endif
3581                                NULL
3582                            },
3583 };