git.sesse.net Git - ffmpeg/blob - libavcodec/hevcdec.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/attributes.h"
  27 #include "libavutil/common.h"
  28 #include "libavutil/display.h"
  29 #include "libavutil/internal.h"
  30 #include "libavutil/mastering_display_metadata.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41 #include "hevc_data.h"
  42 #include "hevcdec.h"
  43 #include "profiles.h"
  44
  45 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  46
  47 /**
  48  * NOTE: Each function hls_foo correspond to the function foo in the
  49  * specification (HLS stands for High Level Syntax).
  50  */
  51
  52 /**
  53  * Section 5.7
  54  */
  55
  56 /* free everything allocated  by pic_arrays_init() */
  57 static void pic_arrays_free(HEVCContext *s)
  58 {
  59     av_freep(&s->sao);
  60     av_freep(&s->deblock);
  61
  62     av_freep(&s->skip_flag);
  63     av_freep(&s->tab_ct_depth);
  64
  65     av_freep(&s->tab_ipm);
  66     av_freep(&s->cbf_luma);
  67     av_freep(&s->is_pcm);
  68
  69     av_freep(&s->qp_y_tab);
  70     av_freep(&s->tab_slice_address);
  71     av_freep(&s->filter_slice_edges);
  72
  73     av_freep(&s->horizontal_bs);
  74     av_freep(&s->vertical_bs);
  75
  76     av_freep(&s->sh.entry_point_offset);
  77     av_freep(&s->sh.size);
  78     av_freep(&s->sh.offset);
  79
  80     av_buffer_pool_uninit(&s->tab_mvf_pool);
  81     av_buffer_pool_uninit(&s->rpl_tab_pool);
  82 }
  83
  84 /* allocate arrays that depend on frame dimensions */
  85 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  86 {
  87     int log2_min_cb_size = sps->log2_min_cb_size;
  88     int width            = sps->width;
  89     int height           = sps->height;
  90     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  91                            ((height >> log2_min_cb_size) + 1);
  92     int ctb_count        = sps->ctb_width * sps->ctb_height;
  93     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  94
  95     s->bs_width  = (width  >> 2) + 1;
  96     s->bs_height = (height >> 2) + 1;
  97
  98     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  99     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 100     if (!s->sao || !s->deblock)
 101         goto fail;
 102
 103     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 104     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 105     if (!s->skip_flag || !s->tab_ct_depth)
 106         goto fail;
 107
 108     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 109     s->tab_ipm  = av_mallocz(min_pu_size);
 110     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 111     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 112         goto fail;
 113
 114     s->filter_slice_edges = av_mallocz(ctb_count);
 115     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 116                                       sizeof(*s->tab_slice_address));
 117     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 118                                       sizeof(*s->qp_y_tab));
 119     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 120         goto fail;
 121
 122     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 123     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 124     if (!s->horizontal_bs || !s->vertical_bs)
 125         goto fail;
 126
 127     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 128                                           av_buffer_allocz);
 129     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 130                                           av_buffer_allocz);
 131     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 132         goto fail;
 133
 134     return 0;
 135
 136 fail:
 137     pic_arrays_free(s);
 138     return AVERROR(ENOMEM);
 139 }
 140
 141 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 142 {
 143     int i = 0;
 144     int j = 0;
 145     uint8_t luma_weight_l0_flag[16];
 146     uint8_t chroma_weight_l0_flag[16];
 147     uint8_t luma_weight_l1_flag[16];
 148     uint8_t chroma_weight_l1_flag[16];
 149     int luma_log2_weight_denom;
 150
 151     luma_log2_weight_denom = get_ue_golomb_long(gb);
 152     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 153         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 154     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 155     if (s->ps.sps->chroma_format_idc != 0) {
 156         int delta = get_se_golomb(gb);
 157         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 158     }
 159
 160     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 161         luma_weight_l0_flag[i] = get_bits1(gb);
 162         if (!luma_weight_l0_flag[i]) {
 163             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 164             s->sh.luma_offset_l0[i] = 0;
 165         }
 166     }
 167     if (s->ps.sps->chroma_format_idc != 0) {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = get_bits1(gb);
 170     } else {
 171         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 172             chroma_weight_l0_flag[i] = 0;
 173     }
 174     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 175         if (luma_weight_l0_flag[i]) {
 176             int delta_luma_weight_l0 = get_se_golomb(gb);
 177             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 178             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 179         }
 180         if (chroma_weight_l0_flag[i]) {
 181             for (j = 0; j < 2; j++) {
 182                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 183                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 184                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 185                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 186                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 187             }
 188         } else {
 189             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 190             s->sh.chroma_offset_l0[i][0] = 0;
 191             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 192             s->sh.chroma_offset_l0[i][1] = 0;
 193         }
 194     }
 195     if (s->sh.slice_type == B_SLICE) {
 196         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 197             luma_weight_l1_flag[i] = get_bits1(gb);
 198             if (!luma_weight_l1_flag[i]) {
 199                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 200                 s->sh.luma_offset_l1[i] = 0;
 201             }
 202         }
 203         if (s->ps.sps->chroma_format_idc != 0) {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = get_bits1(gb);
 206         } else {
 207             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 208                 chroma_weight_l1_flag[i] = 0;
 209         }
 210         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 211             if (luma_weight_l1_flag[i]) {
 212                 int delta_luma_weight_l1 = get_se_golomb(gb);
 213                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 214                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 215             }
 216             if (chroma_weight_l1_flag[i]) {
 217                 for (j = 0; j < 2; j++) {
 218                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 219                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 220                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 221                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 222                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 223                 }
 224             } else {
 225                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 226                 s->sh.chroma_offset_l1[i][0] = 0;
 227                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 228                 s->sh.chroma_offset_l1[i][1] = 0;
 229             }
 230         }
 231     }
 232 }
 233
 234 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 235 {
 236     const HEVCSPS *sps = s->ps.sps;
 237     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 238     int prev_delta_msb = 0;
 239     unsigned int nb_sps = 0, nb_sh;
 240     int i;
 241
 242     rps->nb_refs = 0;
 243     if (!sps->long_term_ref_pics_present_flag)
 244         return 0;
 245
 246     if (sps->num_long_term_ref_pics_sps > 0)
 247         nb_sps = get_ue_golomb_long(gb);
 248     nb_sh = get_ue_golomb_long(gb);
 249
 250     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 251         return AVERROR_INVALIDDATA;
 252
 253     rps->nb_refs = nb_sh + nb_sps;
 254
 255     for (i = 0; i < rps->nb_refs; i++) {
 256         uint8_t delta_poc_msb_present;
 257
 258         if (i < nb_sps) {
 259             uint8_t lt_idx_sps = 0;
 260
 261             if (sps->num_long_term_ref_pics_sps > 1)
 262                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 263
 264             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 265             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 266         } else {
 267             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 268             rps->used[i] = get_bits1(gb);
 269         }
 270
 271         delta_poc_msb_present = get_bits1(gb);
 272         if (delta_poc_msb_present) {
 273             int delta = get_ue_golomb_long(gb);
 274
 275             if (i && i != nb_sps)
 276                 delta += prev_delta_msb;
 277
 278             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 279             prev_delta_msb = delta;
 280         }
 281     }
 282
 283     return 0;
 284 }
 285
 286 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 287                                  const HEVCSPS *sps)
 288 {
 289     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 290     unsigned int num = 0, den = 0;
 291
 292     avctx->pix_fmt             = sps->pix_fmt;
 293     avctx->coded_width         = sps->width;
 294     avctx->coded_height        = sps->height;
 295     avctx->width               = sps->output_width;
 296     avctx->height              = sps->output_height;
 297     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 298     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 299     avctx->level               = sps->ptl.general_ptl.level_idc;
 300
 301     ff_set_sar(avctx, sps->vui.sar);
 302
 303     if (sps->vui.video_signal_type_present_flag)
 304         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 305                                                             : AVCOL_RANGE_MPEG;
 306     else
 307         avctx->color_range = AVCOL_RANGE_MPEG;
 308
 309     if (sps->vui.colour_description_present_flag) {
 310         avctx->color_primaries = sps->vui.colour_primaries;
 311         avctx->color_trc       = sps->vui.transfer_characteristic;
 312         avctx->colorspace      = sps->vui.matrix_coeffs;
 313     } else {
 314         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 315         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 316         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 317     }
 318
 319     if (vps->vps_timing_info_present_flag) {
 320         num = vps->vps_num_units_in_tick;
 321         den = vps->vps_time_scale;
 322     } else if (sps->vui.vui_timing_info_present_flag) {
 323         num = sps->vui.vui_num_units_in_tick;
 324         den = sps->vui.vui_time_scale;
 325     }
 326
 327     if (num != 0 && den != 0)
 328         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 329                   num, den, 1 << 30);
 330 }
 331
 332 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 333 {
 334     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 335     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 336     int ret, i;
 337
 338     pic_arrays_free(s);
 339     s->ps.sps = NULL;
 340     s->ps.vps = NULL;
 341
 342     if (!sps)
 343         return 0;
 344
 345     ret = pic_arrays_init(s, sps);
 346     if (ret < 0)
 347         goto fail;
 348
 349     export_stream_params(s->avctx, &s->ps, sps);
 350
 351     switch (sps->pix_fmt) {
 352     case AV_PIX_FMT_YUV420P:
 353     case AV_PIX_FMT_YUVJ420P:
 354 #if CONFIG_HEVC_DXVA2_HWACCEL
 355         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 356 #endif
 357 #if CONFIG_HEVC_D3D11VA_HWACCEL
 358         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 359 #endif
 360 #if CONFIG_HEVC_VAAPI_HWACCEL
 361         *fmt++ = AV_PIX_FMT_VAAPI;
 362 #endif
 363 #if CONFIG_HEVC_VDPAU_HWACCEL
 364         *fmt++ = AV_PIX_FMT_VDPAU;
 365 #endif
 366         break;
 367     case AV_PIX_FMT_YUV420P10:
 368 #if CONFIG_HEVC_DXVA2_HWACCEL
 369         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 370 #endif
 371 #if CONFIG_HEVC_D3D11VA_HWACCEL
 372         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 373 #endif
 374 #if CONFIG_HEVC_VAAPI_HWACCEL
 375         *fmt++ = AV_PIX_FMT_VAAPI;
 376 #endif
 377         break;
 378     }
 379
 380     if (pix_fmt == AV_PIX_FMT_NONE) {
 381         *fmt++ = sps->pix_fmt;
 382         *fmt = AV_PIX_FMT_NONE;
 383
 384         ret = ff_thread_get_format(s->avctx, pix_fmts);
 385         if (ret < 0)
 386             goto fail;
 387         s->avctx->pix_fmt = ret;
 388     }
 389     else {
 390         s->avctx->pix_fmt = pix_fmt;
 391     }
 392
 393     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 394     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 395     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 396
 397     for (i = 0; i < 3; i++) {
 398         av_freep(&s->sao_pixel_buffer_h[i]);
 399         av_freep(&s->sao_pixel_buffer_v[i]);
 400     }
 401
 402     if (sps->sao_enabled && !s->avctx->hwaccel) {
 403         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 404         int c_idx;
 405
 406         for(c_idx = 0; c_idx < c_count; c_idx++) {
 407             int w = sps->width >> sps->hshift[c_idx];
 408             int h = sps->height >> sps->vshift[c_idx];
 409             s->sao_pixel_buffer_h[c_idx] =
 410                 av_malloc((w * 2 * sps->ctb_height) <<
 411                           sps->pixel_shift);
 412             s->sao_pixel_buffer_v[c_idx] =
 413                 av_malloc((h * 2 * sps->ctb_width) <<
 414                           sps->pixel_shift);
 415         }
 416     }
 417
 418     s->ps.sps = sps;
 419     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 420
 421     return 0;
 422
 423 fail:
 424     pic_arrays_free(s);
 425     s->ps.sps = NULL;
 426     return ret;
 427 }
 428
 429 static int hls_slice_header(HEVCContext *s)
 430 {
 431     GetBitContext *gb = &s->HEVClc->gb;
 432     SliceHeader *sh   = &s->sh;
 433     int i, ret;
 434
 435     // Coded parameters
 436     sh->first_slice_in_pic_flag = get_bits1(gb);
 437     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 438         s->seq_decode = (s->seq_decode + 1) & 0xff;
 439         s->max_ra     = INT_MAX;
 440         if (IS_IDR(s))
 441             ff_hevc_clear_refs(s);
 442     }
 443     sh->no_output_of_prior_pics_flag = 0;
 444     if (IS_IRAP(s))
 445         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 446
 447     sh->pps_id = get_ue_golomb_long(gb);
 448     if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 449         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 450         return AVERROR_INVALIDDATA;
 451     }
 452     if (!sh->first_slice_in_pic_flag &&
 453         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 454         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 455         return AVERROR_INVALIDDATA;
 456     }
 457     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 458     if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
 459         sh->no_output_of_prior_pics_flag = 1;
 460
 461     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 462         const HEVCSPS* last_sps = s->ps.sps;
 463         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 464         if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
 465             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 466                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 467                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 468                 sh->no_output_of_prior_pics_flag = 0;
 469         }
 470         ff_hevc_clear_refs(s);
 471         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 472         if (ret < 0)
 473             return ret;
 474
 475         s->seq_decode = (s->seq_decode + 1) & 0xff;
 476         s->max_ra     = INT_MAX;
 477     }
 478
 479     sh->dependent_slice_segment_flag = 0;
 480     if (!sh->first_slice_in_pic_flag) {
 481         int slice_address_length;
 482
 483         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 484             sh->dependent_slice_segment_flag = get_bits1(gb);
 485
 486         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 487                                             s->ps.sps->ctb_height);
 488         sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
 489         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 490             av_log(s->avctx, AV_LOG_ERROR,
 491                    "Invalid slice segment address: %u.\n",
 492                    sh->slice_segment_addr);
 493             return AVERROR_INVALIDDATA;
 494         }
 495
 496         if (!sh->dependent_slice_segment_flag) {
 497             sh->slice_addr = sh->slice_segment_addr;
 498             s->slice_idx++;
 499         }
 500     } else {
 501         sh->slice_segment_addr = sh->slice_addr = 0;
 502         s->slice_idx           = 0;
 503         s->slice_initialized   = 0;
 504     }
 505
 506     if (!sh->dependent_slice_segment_flag) {
 507         s->slice_initialized = 0;
 508
 509         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 510             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 511
 512         sh->slice_type = get_ue_golomb_long(gb);
 513         if (!(sh->slice_type == I_SLICE ||
 514               sh->slice_type == P_SLICE ||
 515               sh->slice_type == B_SLICE)) {
 516             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 517                    sh->slice_type);
 518             return AVERROR_INVALIDDATA;
 519         }
 520         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 521             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 522             return AVERROR_INVALIDDATA;
 523         }
 524
 525         // when flag is not present, picture is inferred to be output
 526         sh->pic_output_flag = 1;
 527         if (s->ps.pps->output_flag_present_flag)
 528             sh->pic_output_flag = get_bits1(gb);
 529
 530         if (s->ps.sps->separate_colour_plane_flag)
 531             sh->colour_plane_id = get_bits(gb, 2);
 532
 533         if (!IS_IDR(s)) {
 534             int poc, pos;
 535
 536             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 537             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 538             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 539                 av_log(s->avctx, AV_LOG_WARNING,
 540                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 541                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 542                     return AVERROR_INVALIDDATA;
 543                 poc = s->poc;
 544             }
 545             s->poc = poc;
 546
 547             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 548             pos = get_bits_left(gb);
 549             if (!sh->short_term_ref_pic_set_sps_flag) {
 550                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 551                 if (ret < 0)
 552                     return ret;
 553
 554                 sh->short_term_rps = &sh->slice_rps;
 555             } else {
 556                 int numbits, rps_idx;
 557
 558                 if (!s->ps.sps->nb_st_rps) {
 559                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 560                     return AVERROR_INVALIDDATA;
 561                 }
 562
 563                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 564                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 565                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 566             }
 567             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 568
 569             pos = get_bits_left(gb);
 570             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 571             if (ret < 0) {
 572                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 573                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 574                     return AVERROR_INVALIDDATA;
 575             }
 576             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 577
 578             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 579                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 580             else
 581                 sh->slice_temporal_mvp_enabled_flag = 0;
 582         } else {
 583             s->sh.short_term_rps = NULL;
 584             s->poc               = 0;
 585         }
 586
 587         /* 8.3.1 */
 588         if (s->temporal_id == 0 &&
 589             s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 590             s->nal_unit_type != HEVC_NAL_TSA_N   &&
 591             s->nal_unit_type != HEVC_NAL_STSA_N  &&
 592             s->nal_unit_type != HEVC_NAL_RADL_N  &&
 593             s->nal_unit_type != HEVC_NAL_RADL_R  &&
 594             s->nal_unit_type != HEVC_NAL_RASL_N  &&
 595             s->nal_unit_type != HEVC_NAL_RASL_R)
 596             s->pocTid0 = s->poc;
 597
 598         if (s->ps.sps->sao_enabled) {
 599             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 600             if (s->ps.sps->chroma_format_idc) {
 601                 sh->slice_sample_adaptive_offset_flag[1] =
 602                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 603             }
 604         } else {
 605             sh->slice_sample_adaptive_offset_flag[0] = 0;
 606             sh->slice_sample_adaptive_offset_flag[1] = 0;
 607             sh->slice_sample_adaptive_offset_flag[2] = 0;
 608         }
 609
 610         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 611         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 612             int nb_refs;
 613
 614             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 615             if (sh->slice_type == B_SLICE)
 616                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 617
 618             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 619                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 620                 if (sh->slice_type == B_SLICE)
 621                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 622             }
 623             if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
 624                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 625                        sh->nb_refs[L0], sh->nb_refs[L1]);
 626                 return AVERROR_INVALIDDATA;
 627             }
 628
 629             sh->rpl_modification_flag[0] = 0;
 630             sh->rpl_modification_flag[1] = 0;
 631             nb_refs = ff_hevc_frame_nb_refs(s);
 632             if (!nb_refs) {
 633                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 634                 return AVERROR_INVALIDDATA;
 635             }
 636
 637             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 638                 sh->rpl_modification_flag[0] = get_bits1(gb);
 639                 if (sh->rpl_modification_flag[0]) {
 640                     for (i = 0; i < sh->nb_refs[L0]; i++)
 641                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 642                 }
 643
 644                 if (sh->slice_type == B_SLICE) {
 645                     sh->rpl_modification_flag[1] = get_bits1(gb);
 646                     if (sh->rpl_modification_flag[1] == 1)
 647                         for (i = 0; i < sh->nb_refs[L1]; i++)
 648                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 649                 }
 650             }
 651
 652             if (sh->slice_type == B_SLICE)
 653                 sh->mvd_l1_zero_flag = get_bits1(gb);
 654
 655             if (s->ps.pps->cabac_init_present_flag)
 656                 sh->cabac_init_flag = get_bits1(gb);
 657             else
 658                 sh->cabac_init_flag = 0;
 659
 660             sh->collocated_ref_idx = 0;
 661             if (sh->slice_temporal_mvp_enabled_flag) {
 662                 sh->collocated_list = L0;
 663                 if (sh->slice_type == B_SLICE)
 664                     sh->collocated_list = !get_bits1(gb);
 665
 666                 if (sh->nb_refs[sh->collocated_list] > 1) {
 667                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 668                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 669                         av_log(s->avctx, AV_LOG_ERROR,
 670                                "Invalid collocated_ref_idx: %d.\n",
 671                                sh->collocated_ref_idx);
 672                         return AVERROR_INVALIDDATA;
 673                     }
 674                 }
 675             }
 676
 677             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 678                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 679                 pred_weight_table(s, gb);
 680             }
 681
 682             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 683             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 684                 av_log(s->avctx, AV_LOG_ERROR,
 685                        "Invalid number of merging MVP candidates: %d.\n",
 686                        sh->max_num_merge_cand);
 687                 return AVERROR_INVALIDDATA;
 688             }
 689         }
 690
 691         sh->slice_qp_delta = get_se_golomb(gb);
 692
 693         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 694             sh->slice_cb_qp_offset = get_se_golomb(gb);
 695             sh->slice_cr_qp_offset = get_se_golomb(gb);
 696         } else {
 697             sh->slice_cb_qp_offset = 0;
 698             sh->slice_cr_qp_offset = 0;
 699         }
 700
 701         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 702             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 703         else
 704             sh->cu_chroma_qp_offset_enabled_flag = 0;
 705
 706         if (s->ps.pps->deblocking_filter_control_present_flag) {
 707             int deblocking_filter_override_flag = 0;
 708
 709             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 710                 deblocking_filter_override_flag = get_bits1(gb);
 711
 712             if (deblocking_filter_override_flag) {
 713                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 714                 if (!sh->disable_deblocking_filter_flag) {
 715                     sh->beta_offset = get_se_golomb(gb) * 2;
 716                     sh->tc_offset   = get_se_golomb(gb) * 2;
 717                 }
 718             } else {
 719                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 720                 sh->beta_offset                    = s->ps.pps->beta_offset;
 721                 sh->tc_offset                      = s->ps.pps->tc_offset;
 722             }
 723         } else {
 724             sh->disable_deblocking_filter_flag = 0;
 725             sh->beta_offset                    = 0;
 726             sh->tc_offset                      = 0;
 727         }
 728
 729         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 730             (sh->slice_sample_adaptive_offset_flag[0] ||
 731              sh->slice_sample_adaptive_offset_flag[1] ||
 732              !sh->disable_deblocking_filter_flag)) {
 733             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 734         } else {
 735             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 736         }
 737     } else if (!s->slice_initialized) {
 738         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 739         return AVERROR_INVALIDDATA;
 740     }
 741
 742     sh->num_entry_point_offsets = 0;
 743     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 744         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 745         // It would be possible to bound this tighter but this here is simpler
 746         if (num_entry_point_offsets > get_bits_left(gb)) {
 747             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 748             return AVERROR_INVALIDDATA;
 749         }
 750
 751         sh->num_entry_point_offsets = num_entry_point_offsets;
 752         if (sh->num_entry_point_offsets > 0) {
 753             int offset_len = get_ue_golomb_long(gb) + 1;
 754
 755             if (offset_len < 1 || offset_len > 32) {
 756                 sh->num_entry_point_offsets = 0;
 757                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 758                 return AVERROR_INVALIDDATA;
 759             }
 760
 761             av_freep(&sh->entry_point_offset);
 762             av_freep(&sh->offset);
 763             av_freep(&sh->size);
 764             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 765             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 766             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 767             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 768                 sh->num_entry_point_offsets = 0;
 769                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 770                 return AVERROR(ENOMEM);
 771             }
 772             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 773                 unsigned val = get_bits_long(gb, offset_len);
 774                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 775             }
 776             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 777                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 778                 s->threads_number = 1;
 779             } else
 780                 s->enable_parallel_tiles = 0;
 781         } else
 782             s->enable_parallel_tiles = 0;
 783     }
 784
 785     if (s->ps.pps->slice_header_extension_present_flag) {
 786         unsigned int length = get_ue_golomb_long(gb);
 787         if (length*8LL > get_bits_left(gb)) {
 788             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 789             return AVERROR_INVALIDDATA;
 790         }
 791         for (i = 0; i < length; i++)
 792             skip_bits(gb, 8);  // slice_header_extension_data_byte
 793     }
 794
 795     // Inferred parameters
 796     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 797     if (sh->slice_qp > 51 ||
 798         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 799         av_log(s->avctx, AV_LOG_ERROR,
 800                "The slice_qp %d is outside the valid range "
 801                "[%d, 51].\n",
 802                sh->slice_qp,
 803                -s->ps.sps->qp_bd_offset);
 804         return AVERROR_INVALIDDATA;
 805     }
 806
 807     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 808
 809     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 810         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 811         return AVERROR_INVALIDDATA;
 812     }
 813
 814     if (get_bits_left(gb) < 0) {
 815         av_log(s->avctx, AV_LOG_ERROR,
 816                "Overread slice header by %d bits\n", -get_bits_left(gb));
 817         return AVERROR_INVALIDDATA;
 818     }
 819
 820     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 821
 822     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 823         s->HEVClc->qp_y = s->sh.slice_qp;
 824
 825     s->slice_initialized = 1;
 826     s->HEVClc->tu.cu_qp_offset_cb = 0;
 827     s->HEVClc->tu.cu_qp_offset_cr = 0;
 828
 829     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
 830
 831     return 0;
 832 }
 833
 834 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 835
 836 #define SET_SAO(elem, value)                            \
 837 do {                                                    \
 838     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 839         sao->elem = value;                              \
 840     else if (sao_merge_left_flag)                       \
 841         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 842     else if (sao_merge_up_flag)                         \
 843         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 844     else                                                \
 845         sao->elem = 0;                                  \
 846 } while (0)
 847
 848 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 849 {
 850     HEVCLocalContext *lc    = s->HEVClc;
 851     int sao_merge_left_flag = 0;
 852     int sao_merge_up_flag   = 0;
 853     SAOParams *sao          = &CTB(s->sao, rx, ry);
 854     int c_idx, i;
 855
 856     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 857         s->sh.slice_sample_adaptive_offset_flag[1]) {
 858         if (rx > 0) {
 859             if (lc->ctb_left_flag)
 860                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 861         }
 862         if (ry > 0 && !sao_merge_left_flag) {
 863             if (lc->ctb_up_flag)
 864                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 865         }
 866     }
 867
 868     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 869         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 870                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 871
 872         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 873             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 874             continue;
 875         }
 876
 877         if (c_idx == 2) {
 878             sao->type_idx[2] = sao->type_idx[1];
 879             sao->eo_class[2] = sao->eo_class[1];
 880         } else {
 881             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 882         }
 883
 884         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 885             continue;
 886
 887         for (i = 0; i < 4; i++)
 888             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 889
 890         if (sao->type_idx[c_idx] == SAO_BAND) {
 891             for (i = 0; i < 4; i++) {
 892                 if (sao->offset_abs[c_idx][i]) {
 893                     SET_SAO(offset_sign[c_idx][i],
 894                             ff_hevc_sao_offset_sign_decode(s));
 895                 } else {
 896                     sao->offset_sign[c_idx][i] = 0;
 897                 }
 898             }
 899             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 900         } else if (c_idx != 2) {
 901             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 902         }
 903
 904         // Inferred parameters
 905         sao->offset_val[c_idx][0] = 0;
 906         for (i = 0; i < 4; i++) {
 907             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 908             if (sao->type_idx[c_idx] == SAO_EDGE) {
 909                 if (i > 1)
 910                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 911             } else if (sao->offset_sign[c_idx][i]) {
 912                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 913             }
 914             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 915         }
 916     }
 917 }
 918
 919 #undef SET_SAO
 920 #undef CTB
 921
 922 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 923     HEVCLocalContext *lc    = s->HEVClc;
 924     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 925
 926     if (log2_res_scale_abs_plus1 !=  0) {
 927         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 928         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 929                                (1 - 2 * res_scale_sign_flag);
 930     } else {
 931         lc->tu.res_scale_val = 0;
 932     }
 933
 934
 935     return 0;
 936 }
 937
 938 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 939                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 940                               int log2_cb_size, int log2_trafo_size,
 941                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 942 {
 943     HEVCLocalContext *lc = s->HEVClc;
 944     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 945     int i;
 946
 947     if (lc->cu.pred_mode == MODE_INTRA) {
 948         int trafo_size = 1 << log2_trafo_size;
 949         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 950
 951         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 952     }
 953
 954     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 955         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 956         int scan_idx   = SCAN_DIAG;
 957         int scan_idx_c = SCAN_DIAG;
 958         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 959                          (s->ps.sps->chroma_format_idc == 2 &&
 960                          (cbf_cb[1] || cbf_cr[1]));
 961
 962         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 963             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 964             if (lc->tu.cu_qp_delta != 0)
 965                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 966                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 967             lc->tu.is_cu_qp_delta_coded = 1;
 968
 969             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 970                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 971                 av_log(s->avctx, AV_LOG_ERROR,
 972                        "The cu_qp_delta %d is outside the valid range "
 973                        "[%d, %d].\n",
 974                        lc->tu.cu_qp_delta,
 975                        -(26 + s->ps.sps->qp_bd_offset / 2),
 976                         (25 + s->ps.sps->qp_bd_offset / 2));
 977                 return AVERROR_INVALIDDATA;
 978             }
 979
 980             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 981         }
 982
 983         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 984             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 985             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 986             if (cu_chroma_qp_offset_flag) {
 987                 int cu_chroma_qp_offset_idx  = 0;
 988                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 989                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 990                     av_log(s->avctx, AV_LOG_ERROR,
 991                         "cu_chroma_qp_offset_idx not yet tested.\n");
 992                 }
 993                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 994                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 995             } else {
 996                 lc->tu.cu_qp_offset_cb = 0;
 997                 lc->tu.cu_qp_offset_cr = 0;
 998             }
 999             lc->tu.is_cu_chroma_qp_offset_coded = 1;
1000         }
1001
1002         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
1003             if (lc->tu.intra_pred_mode >= 6 &&
1004                 lc->tu.intra_pred_mode <= 14) {
1005                 scan_idx = SCAN_VERT;
1006             } else if (lc->tu.intra_pred_mode >= 22 &&
1007                        lc->tu.intra_pred_mode <= 30) {
1008                 scan_idx = SCAN_HORIZ;
1009             }
1010
1011             if (lc->tu.intra_pred_mode_c >=  6 &&
1012                 lc->tu.intra_pred_mode_c <= 14) {
1013                 scan_idx_c = SCAN_VERT;
1014             } else if (lc->tu.intra_pred_mode_c >= 22 &&
1015                        lc->tu.intra_pred_mode_c <= 30) {
1016                 scan_idx_c = SCAN_HORIZ;
1017             }
1018         }
1019
1020         lc->tu.cross_pf = 0;
1021
1022         if (cbf_luma)
1023             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1024         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1025             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1026             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1027             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1028                                 (lc->cu.pred_mode == MODE_INTER ||
1029                                  (lc->tu.chroma_mode_c ==  4)));
1030
1031             if (lc->tu.cross_pf) {
1032                 hls_cross_component_pred(s, 0);
1033             }
1034             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1035                 if (lc->cu.pred_mode == MODE_INTRA) {
1036                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1037                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1038                 }
1039                 if (cbf_cb[i])
1040                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1041                                                 log2_trafo_size_c, scan_idx_c, 1);
1042                 else
1043                     if (lc->tu.cross_pf) {
1044                         ptrdiff_t stride = s->frame->linesize[1];
1045                         int hshift = s->ps.sps->hshift[1];
1046                         int vshift = s->ps.sps->vshift[1];
1047                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1048                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1049                         int size = 1 << log2_trafo_size_c;
1050
1051                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1052                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1053                         for (i = 0; i < (size * size); i++) {
1054                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1055                         }
1056                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1057                     }
1058             }
1059
1060             if (lc->tu.cross_pf) {
1061                 hls_cross_component_pred(s, 1);
1062             }
1063             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1064                 if (lc->cu.pred_mode == MODE_INTRA) {
1065                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1066                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1067                 }
1068                 if (cbf_cr[i])
1069                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1070                                                 log2_trafo_size_c, scan_idx_c, 2);
1071                 else
1072                     if (lc->tu.cross_pf) {
1073                         ptrdiff_t stride = s->frame->linesize[2];
1074                         int hshift = s->ps.sps->hshift[2];
1075                         int vshift = s->ps.sps->vshift[2];
1076                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1077                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1078                         int size = 1 << log2_trafo_size_c;
1079
1080                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1081                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1082                         for (i = 0; i < (size * size); i++) {
1083                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1084                         }
1085                         s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
1086                     }
1087             }
1088         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1089             int trafo_size_h = 1 << (log2_trafo_size + 1);
1090             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1091             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1092                 if (lc->cu.pred_mode == MODE_INTRA) {
1093                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1094                                                     trafo_size_h, trafo_size_v);
1095                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1096                 }
1097                 if (cbf_cb[i])
1098                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1099                                                 log2_trafo_size, scan_idx_c, 1);
1100             }
1101             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1102                 if (lc->cu.pred_mode == MODE_INTRA) {
1103                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1104                                                 trafo_size_h, trafo_size_v);
1105                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1106                 }
1107                 if (cbf_cr[i])
1108                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1109                                                 log2_trafo_size, scan_idx_c, 2);
1110             }
1111         }
1112     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1113         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1114             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1115             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1116             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1117             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1118             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1119             if (s->ps.sps->chroma_format_idc == 2) {
1120                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1121                                                 trafo_size_h, trafo_size_v);
1122                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1123                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1124             }
1125         } else if (blk_idx == 3) {
1126             int trafo_size_h = 1 << (log2_trafo_size + 1);
1127             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1128             ff_hevc_set_neighbour_available(s, xBase, yBase,
1129                                             trafo_size_h, trafo_size_v);
1130             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1131             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1132             if (s->ps.sps->chroma_format_idc == 2) {
1133                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1134                                                 trafo_size_h, trafo_size_v);
1135                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1136                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1137             }
1138         }
1139     }
1140
1141     return 0;
1142 }
1143
1144 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1145 {
1146     int cb_size          = 1 << log2_cb_size;
1147     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1148
1149     int min_pu_width     = s->ps.sps->min_pu_width;
1150     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1151     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1152     int i, j;
1153
1154     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1155         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1156             s->is_pcm[i + j * min_pu_width] = 2;
1157 }
1158
1159 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1160                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1161                               int log2_cb_size, int log2_trafo_size,
1162                               int trafo_depth, int blk_idx,
1163                               const int *base_cbf_cb, const int *base_cbf_cr)
1164 {
1165     HEVCLocalContext *lc = s->HEVClc;
1166     uint8_t split_transform_flag;
1167     int cbf_cb[2];
1168     int cbf_cr[2];
1169     int ret;
1170
1171     cbf_cb[0] = base_cbf_cb[0];
1172     cbf_cb[1] = base_cbf_cb[1];
1173     cbf_cr[0] = base_cbf_cr[0];
1174     cbf_cr[1] = base_cbf_cr[1];
1175
1176     if (lc->cu.intra_split_flag) {
1177         if (trafo_depth == 1) {
1178             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1179             if (s->ps.sps->chroma_format_idc == 3) {
1180                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1181                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1182             } else {
1183                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1184                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1185             }
1186         }
1187     } else {
1188         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1189         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1190         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1191     }
1192
1193     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1194         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1195         trafo_depth     < lc->cu.max_trafo_depth       &&
1196         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1197         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1198     } else {
1199         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1200                           lc->cu.pred_mode == MODE_INTER &&
1201                           lc->cu.part_mode != PART_2Nx2N &&
1202                           trafo_depth == 0;
1203
1204         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1205                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1206                                inter_split;
1207     }
1208
1209     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1210         if (trafo_depth == 0 || cbf_cb[0]) {
1211             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1212             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1213                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1214             }
1215         }
1216
1217         if (trafo_depth == 0 || cbf_cr[0]) {
1218             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1219             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1220                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1221             }
1222         }
1223     }
1224
1225     if (split_transform_flag) {
1226         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1227         const int x1 = x0 + trafo_size_split;
1228         const int y1 = y0 + trafo_size_split;
1229
1230 #define SUBDIVIDE(x, y, idx)                                                    \
1231 do {                                                                            \
1232     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1233                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1234                              cbf_cb, cbf_cr);                                   \
1235     if (ret < 0)                                                                \
1236         return ret;                                                             \
1237 } while (0)
1238
1239         SUBDIVIDE(x0, y0, 0);
1240         SUBDIVIDE(x1, y0, 1);
1241         SUBDIVIDE(x0, y1, 2);
1242         SUBDIVIDE(x1, y1, 3);
1243
1244 #undef SUBDIVIDE
1245     } else {
1246         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1247         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1248         int min_tu_width     = s->ps.sps->min_tb_width;
1249         int cbf_luma         = 1;
1250
1251         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1252             cbf_cb[0] || cbf_cr[0] ||
1253             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1254             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1255         }
1256
1257         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1258                                  log2_cb_size, log2_trafo_size,
1259                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1260         if (ret < 0)
1261             return ret;
1262         // TODO: store cbf_luma somewhere else
1263         if (cbf_luma) {
1264             int i, j;
1265             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1266                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1267                     int x_tu = (x0 + j) >> log2_min_tu_size;
1268                     int y_tu = (y0 + i) >> log2_min_tu_size;
1269                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1270                 }
1271         }
1272         if (!s->sh.disable_deblocking_filter_flag) {
1273             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1274             if (s->ps.pps->transquant_bypass_enable_flag &&
1275                 lc->cu.cu_transquant_bypass_flag)
1276                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1277         }
1278     }
1279     return 0;
1280 }
1281
1282 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1283 {
1284     HEVCLocalContext *lc = s->HEVClc;
1285     GetBitContext gb;
1286     int cb_size   = 1 << log2_cb_size;
1287     ptrdiff_t stride0 = s->frame->linesize[0];
1288     ptrdiff_t stride1 = s->frame->linesize[1];
1289     ptrdiff_t stride2 = s->frame->linesize[2];
1290     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1291     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1292     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1293
1294     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1295                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1296                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1297                           s->ps.sps->pcm.bit_depth_chroma;
1298     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1299     int ret;
1300
1301     if (!s->sh.disable_deblocking_filter_flag)
1302         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1303
1304     ret = init_get_bits(&gb, pcm, length);
1305     if (ret < 0)
1306         return ret;
1307
1308     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1309     if (s->ps.sps->chroma_format_idc) {
1310         s->hevcdsp.put_pcm(dst1, stride1,
1311                            cb_size >> s->ps.sps->hshift[1],
1312                            cb_size >> s->ps.sps->vshift[1],
1313                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1314         s->hevcdsp.put_pcm(dst2, stride2,
1315                            cb_size >> s->ps.sps->hshift[2],
1316                            cb_size >> s->ps.sps->vshift[2],
1317                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1318     }
1319
1320     return 0;
1321 }
1322
1323 /**
1324  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1325  *
1326  * @param s HEVC decoding context
1327  * @param dst target buffer for block data at block position
1328  * @param dststride stride of the dst buffer
1329  * @param ref reference picture buffer at origin (0, 0)
1330  * @param mv motion vector (relative to block position) to get pixel data from
1331  * @param x_off horizontal position of block from origin (0, 0)
1332  * @param y_off vertical position of block from origin (0, 0)
1333  * @param block_w width of block
1334  * @param block_h height of block
1335  * @param luma_weight weighting factor applied to the luma prediction
1336  * @param luma_offset additive offset applied to the luma prediction value
1337  */
1338
1339 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1340                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1341                         int block_w, int block_h, int luma_weight, int luma_offset)
1342 {
1343     HEVCLocalContext *lc = s->HEVClc;
1344     uint8_t *src         = ref->data[0];
1345     ptrdiff_t srcstride  = ref->linesize[0];
1346     int pic_width        = s->ps.sps->width;
1347     int pic_height       = s->ps.sps->height;
1348     int mx               = mv->x & 3;
1349     int my               = mv->y & 3;
1350     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1351                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1352     int idx              = ff_hevc_pel_weight[block_w];
1353
1354     x_off += mv->x >> 2;
1355     y_off += mv->y >> 2;
1356     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1357
1358     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1359         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1360         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1361         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1362         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1363         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1364
1365         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1366                                  edge_emu_stride, srcstride,
1367                                  block_w + QPEL_EXTRA,
1368                                  block_h + QPEL_EXTRA,
1369                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1370                                  pic_width, pic_height);
1371         src = lc->edge_emu_buffer + buf_offset;
1372         srcstride = edge_emu_stride;
1373     }
1374
1375     if (!weight_flag)
1376         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1377                                                       block_h, mx, my, block_w);
1378     else
1379         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1380                                                         block_h, s->sh.luma_log2_weight_denom,
1381                                                         luma_weight, luma_offset, mx, my, block_w);
1382 }
1383
1384 /**
1385  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1386  *
1387  * @param s HEVC decoding context
1388  * @param dst target buffer for block data at block position
1389  * @param dststride stride of the dst buffer
1390  * @param ref0 reference picture0 buffer at origin (0, 0)
1391  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1392  * @param x_off horizontal position of block from origin (0, 0)
1393  * @param y_off vertical position of block from origin (0, 0)
1394  * @param block_w width of block
1395  * @param block_h height of block
1396  * @param ref1 reference picture1 buffer at origin (0, 0)
1397  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1398  * @param current_mv current motion vector structure
1399  */
1400  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1401                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1402                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1403 {
1404     HEVCLocalContext *lc = s->HEVClc;
1405     ptrdiff_t src0stride  = ref0->linesize[0];
1406     ptrdiff_t src1stride  = ref1->linesize[0];
1407     int pic_width        = s->ps.sps->width;
1408     int pic_height       = s->ps.sps->height;
1409     int mx0              = mv0->x & 3;
1410     int my0              = mv0->y & 3;
1411     int mx1              = mv1->x & 3;
1412     int my1              = mv1->y & 3;
1413     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1414                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1415     int x_off0           = x_off + (mv0->x >> 2);
1416     int y_off0           = y_off + (mv0->y >> 2);
1417     int x_off1           = x_off + (mv1->x >> 2);
1418     int y_off1           = y_off + (mv1->y >> 2);
1419     int idx              = ff_hevc_pel_weight[block_w];
1420
1421     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1422     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1423
1424     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1425         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1426         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1427         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1428         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1429         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1430
1431         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1432                                  edge_emu_stride, src0stride,
1433                                  block_w + QPEL_EXTRA,
1434                                  block_h + QPEL_EXTRA,
1435                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1436                                  pic_width, pic_height);
1437         src0 = lc->edge_emu_buffer + buf_offset;
1438         src0stride = edge_emu_stride;
1439     }
1440
1441     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1442         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1443         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1444         const ptrdiff_t edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1445         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1446         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1447
1448         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1449                                  edge_emu_stride, src1stride,
1450                                  block_w + QPEL_EXTRA,
1451                                  block_h + QPEL_EXTRA,
1452                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1453                                  pic_width, pic_height);
1454         src1 = lc->edge_emu_buffer2 + buf_offset;
1455         src1stride = edge_emu_stride;
1456     }
1457
1458     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1459                                                 block_h, mx0, my0, block_w);
1460     if (!weight_flag)
1461         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1462                                                        block_h, mx1, my1, block_w);
1463     else
1464         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1465                                                          block_h, s->sh.luma_log2_weight_denom,
1466                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1467                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1468                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1469                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1470                                                          mx1, my1, block_w);
1471
1472 }
1473
1474 /**
1475  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1476  *
1477  * @param s HEVC decoding context
1478  * @param dst1 target buffer for block data at block position (U plane)
1479  * @param dst2 target buffer for block data at block position (V plane)
1480  * @param dststride stride of the dst1 and dst2 buffers
1481  * @param ref reference picture buffer at origin (0, 0)
1482  * @param mv motion vector (relative to block position) to get pixel data from
1483  * @param x_off horizontal position of block from origin (0, 0)
1484  * @param y_off vertical position of block from origin (0, 0)
1485  * @param block_w width of block
1486  * @param block_h height of block
1487  * @param chroma_weight weighting factor applied to the chroma prediction
1488  * @param chroma_offset additive offset applied to the chroma prediction value
1489  */
1490
1491 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1492                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1493                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1494 {
1495     HEVCLocalContext *lc = s->HEVClc;
1496     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1497     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1498     const Mv *mv         = &current_mv->mv[reflist];
1499     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1500                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1501     int idx              = ff_hevc_pel_weight[block_w];
1502     int hshift           = s->ps.sps->hshift[1];
1503     int vshift           = s->ps.sps->vshift[1];
1504     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1505     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1506     intptr_t _mx         = mx << (1 - hshift);
1507     intptr_t _my         = my << (1 - vshift);
1508
1509     x_off += mv->x >> (2 + hshift);
1510     y_off += mv->y >> (2 + vshift);
1511     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1512
1513     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1514         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1515         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1516         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1517         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1518         int buf_offset0 = EPEL_EXTRA_BEFORE *
1519                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1520         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1521                                  edge_emu_stride, srcstride,
1522                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1523                                  x_off - EPEL_EXTRA_BEFORE,
1524                                  y_off - EPEL_EXTRA_BEFORE,
1525                                  pic_width, pic_height);
1526
1527         src0 = lc->edge_emu_buffer + buf_offset0;
1528         srcstride = edge_emu_stride;
1529     }
1530     if (!weight_flag)
1531         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1532                                                   block_h, _mx, _my, block_w);
1533     else
1534         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1535                                                         block_h, s->sh.chroma_log2_weight_denom,
1536                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1537 }
1538
1539 /**
1540  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1541  *
1542  * @param s HEVC decoding context
1543  * @param dst target buffer for block data at block position
1544  * @param dststride stride of the dst buffer
1545  * @param ref0 reference picture0 buffer at origin (0, 0)
1546  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1547  * @param x_off horizontal position of block from origin (0, 0)
1548  * @param y_off vertical position of block from origin (0, 0)
1549  * @param block_w width of block
1550  * @param block_h height of block
1551  * @param ref1 reference picture1 buffer at origin (0, 0)
1552  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1553  * @param current_mv current motion vector structure
1554  * @param cidx chroma component(cb, cr)
1555  */
1556 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1557                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1558 {
1559     HEVCLocalContext *lc = s->HEVClc;
1560     uint8_t *src1        = ref0->data[cidx+1];
1561     uint8_t *src2        = ref1->data[cidx+1];
1562     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1563     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1564     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1565                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1566     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1567     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1568     Mv *mv0              = &current_mv->mv[0];
1569     Mv *mv1              = &current_mv->mv[1];
1570     int hshift = s->ps.sps->hshift[1];
1571     int vshift = s->ps.sps->vshift[1];
1572
1573     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1574     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1575     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1576     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1577     intptr_t _mx0 = mx0 << (1 - hshift);
1578     intptr_t _my0 = my0 << (1 - vshift);
1579     intptr_t _mx1 = mx1 << (1 - hshift);
1580     intptr_t _my1 = my1 << (1 - vshift);
1581
1582     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1583     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1584     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1585     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1586     int idx = ff_hevc_pel_weight[block_w];
1587     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1588     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1589
1590     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1591         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1592         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1593         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1594         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1595         int buf_offset1 = EPEL_EXTRA_BEFORE *
1596                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1597
1598         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1599                                  edge_emu_stride, src1stride,
1600                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1601                                  x_off0 - EPEL_EXTRA_BEFORE,
1602                                  y_off0 - EPEL_EXTRA_BEFORE,
1603                                  pic_width, pic_height);
1604
1605         src1 = lc->edge_emu_buffer + buf_offset1;
1606         src1stride = edge_emu_stride;
1607     }
1608
1609     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1610         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1611         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1612         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1613         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1614         int buf_offset1 = EPEL_EXTRA_BEFORE *
1615                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1616
1617         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1618                                  edge_emu_stride, src2stride,
1619                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1620                                  x_off1 - EPEL_EXTRA_BEFORE,
1621                                  y_off1 - EPEL_EXTRA_BEFORE,
1622                                  pic_width, pic_height);
1623
1624         src2 = lc->edge_emu_buffer2 + buf_offset1;
1625         src2stride = edge_emu_stride;
1626     }
1627
1628     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1629                                                 block_h, _mx0, _my0, block_w);
1630     if (!weight_flag)
1631         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1632                                                        src2, src2stride, lc->tmp,
1633                                                        block_h, _mx1, _my1, block_w);
1634     else
1635         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1636                                                          src2, src2stride, lc->tmp,
1637                                                          block_h,
1638                                                          s->sh.chroma_log2_weight_denom,
1639                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1640                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1641                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1642                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1643                                                          _mx1, _my1, block_w);
1644 }
1645
1646 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1647                                 const Mv *mv, int y0, int height)
1648 {
1649     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1650
1651     if (s->threads_type == FF_THREAD_FRAME )
1652         ff_thread_await_progress(&ref->tf, y, 0);
1653 }
1654
1655 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1656                                   int nPbH, int log2_cb_size, int part_idx,
1657                                   int merge_idx, MvField *mv)
1658 {
1659     HEVCLocalContext *lc = s->HEVClc;
1660     enum InterPredIdc inter_pred_idc = PRED_L0;
1661     int mvp_flag;
1662
1663     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1664     mv->pred_flag = 0;
1665     if (s->sh.slice_type == B_SLICE)
1666         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1667
1668     if (inter_pred_idc != PRED_L1) {
1669         if (s->sh.nb_refs[L0])
1670             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1671
1672         mv->pred_flag = PF_L0;
1673         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1674         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1675         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1676                                  part_idx, merge_idx, mv, mvp_flag, 0);
1677         mv->mv[0].x += lc->pu.mvd.x;
1678         mv->mv[0].y += lc->pu.mvd.y;
1679     }
1680
1681     if (inter_pred_idc != PRED_L0) {
1682         if (s->sh.nb_refs[L1])
1683             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1684
1685         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1686             AV_ZERO32(&lc->pu.mvd);
1687         } else {
1688             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1689         }
1690
1691         mv->pred_flag += PF_L1;
1692         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1693         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1694                                  part_idx, merge_idx, mv, mvp_flag, 1);
1695         mv->mv[1].x += lc->pu.mvd.x;
1696         mv->mv[1].y += lc->pu.mvd.y;
1697     }
1698 }
1699
1700 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1701                                 int nPbW, int nPbH,
1702                                 int log2_cb_size, int partIdx, int idx)
1703 {
1704 #define POS(c_idx, x, y)                                                              \
1705     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1706                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1707     HEVCLocalContext *lc = s->HEVClc;
1708     int merge_idx = 0;
1709     struct MvField current_mv = {{{ 0 }}};
1710
1711     int min_pu_width = s->ps.sps->min_pu_width;
1712
1713     MvField *tab_mvf = s->ref->tab_mvf;
1714     RefPicList  *refPicList = s->ref->refPicList;
1715     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1716     uint8_t *dst0 = POS(0, x0, y0);
1717     uint8_t *dst1 = POS(1, x0, y0);
1718     uint8_t *dst2 = POS(2, x0, y0);
1719     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1720     int min_cb_width     = s->ps.sps->min_cb_width;
1721     int x_cb             = x0 >> log2_min_cb_size;
1722     int y_cb             = y0 >> log2_min_cb_size;
1723     int x_pu, y_pu;
1724     int i, j;
1725
1726     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1727
1728     if (!skip_flag)
1729         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1730
1731     if (skip_flag || lc->pu.merge_flag) {
1732         if (s->sh.max_num_merge_cand > 1)
1733             merge_idx = ff_hevc_merge_idx_decode(s);
1734         else
1735             merge_idx = 0;
1736
1737         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1738                                    partIdx, merge_idx, &current_mv);
1739     } else {
1740         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1741                               partIdx, merge_idx, &current_mv);
1742     }
1743
1744     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1745     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1746
1747     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1748         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1749             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1750
1751     if (current_mv.pred_flag & PF_L0) {
1752         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1753         if (!ref0)
1754             return;
1755         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1756     }
1757     if (current_mv.pred_flag & PF_L1) {
1758         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1759         if (!ref1)
1760             return;
1761         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1762     }
1763
1764     if (current_mv.pred_flag == PF_L0) {
1765         int x0_c = x0 >> s->ps.sps->hshift[1];
1766         int y0_c = y0 >> s->ps.sps->vshift[1];
1767         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1768         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1769
1770         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1771                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1772                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1773                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1774
1775         if (s->ps.sps->chroma_format_idc) {
1776             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1777                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1778                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1779             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1780                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1781                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1782         }
1783     } else if (current_mv.pred_flag == PF_L1) {
1784         int x0_c = x0 >> s->ps.sps->hshift[1];
1785         int y0_c = y0 >> s->ps.sps->vshift[1];
1786         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1787         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1788
1789         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1790                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1791                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1792                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1793
1794         if (s->ps.sps->chroma_format_idc) {
1795             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1796                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1797                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1798
1799             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1800                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1801                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1802         }
1803     } else if (current_mv.pred_flag == PF_BI) {
1804         int x0_c = x0 >> s->ps.sps->hshift[1];
1805         int y0_c = y0 >> s->ps.sps->vshift[1];
1806         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1807         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1808
1809         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1810                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1811                    ref1->frame, &current_mv.mv[1], &current_mv);
1812
1813         if (s->ps.sps->chroma_format_idc) {
1814             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1815                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1816
1817             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1818                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1819         }
1820     }
1821 }
1822
1823 /**
1824  * 8.4.1
1825  */
1826 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1827                                 int prev_intra_luma_pred_flag)
1828 {
1829     HEVCLocalContext *lc = s->HEVClc;
1830     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1831     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1832     int min_pu_width     = s->ps.sps->min_pu_width;
1833     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1834     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1835     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1836
1837     int cand_up   = (lc->ctb_up_flag || y0b) ?
1838                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1839     int cand_left = (lc->ctb_left_flag || x0b) ?
1840                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1841
1842     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1843
1844     MvField *tab_mvf = s->ref->tab_mvf;
1845     int intra_pred_mode;
1846     int candidate[3];
1847     int i, j;
1848
1849     // intra_pred_mode prediction does not cross vertical CTB boundaries
1850     if ((y0 - 1) < y_ctb)
1851         cand_up = INTRA_DC;
1852
1853     if (cand_left == cand_up) {
1854         if (cand_left < 2) {
1855             candidate[0] = INTRA_PLANAR;
1856             candidate[1] = INTRA_DC;
1857             candidate[2] = INTRA_ANGULAR_26;
1858         } else {
1859             candidate[0] = cand_left;
1860             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1861             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1862         }
1863     } else {
1864         candidate[0] = cand_left;
1865         candidate[1] = cand_up;
1866         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1867             candidate[2] = INTRA_PLANAR;
1868         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1869             candidate[2] = INTRA_DC;
1870         } else {
1871             candidate[2] = INTRA_ANGULAR_26;
1872         }
1873     }
1874
1875     if (prev_intra_luma_pred_flag) {
1876         intra_pred_mode = candidate[lc->pu.mpm_idx];
1877     } else {
1878         if (candidate[0] > candidate[1])
1879             FFSWAP(uint8_t, candidate[0], candidate[1]);
1880         if (candidate[0] > candidate[2])
1881             FFSWAP(uint8_t, candidate[0], candidate[2]);
1882         if (candidate[1] > candidate[2])
1883             FFSWAP(uint8_t, candidate[1], candidate[2]);
1884
1885         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1886         for (i = 0; i < 3; i++)
1887             if (intra_pred_mode >= candidate[i])
1888                 intra_pred_mode++;
1889     }
1890
1891     /* write the intra prediction units into the mv array */
1892     if (!size_in_pus)
1893         size_in_pus = 1;
1894     for (i = 0; i < size_in_pus; i++) {
1895         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1896                intra_pred_mode, size_in_pus);
1897
1898         for (j = 0; j < size_in_pus; j++) {
1899             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1900         }
1901     }
1902
1903     return intra_pred_mode;
1904 }
1905
1906 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1907                                           int log2_cb_size, int ct_depth)
1908 {
1909     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1910     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1911     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1912     int y;
1913
1914     for (y = 0; y < length; y++)
1915         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1916                ct_depth, length);
1917 }
1918
1919 static const uint8_t tab_mode_idx[] = {
1920      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1921     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1922
1923 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1924                                   int log2_cb_size)
1925 {
1926     HEVCLocalContext *lc = s->HEVClc;
1927     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1928     uint8_t prev_intra_luma_pred_flag[4];
1929     int split   = lc->cu.part_mode == PART_NxN;
1930     int pb_size = (1 << log2_cb_size) >> split;
1931     int side    = split + 1;
1932     int chroma_mode;
1933     int i, j;
1934
1935     for (i = 0; i < side; i++)
1936         for (j = 0; j < side; j++)
1937             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1938
1939     for (i = 0; i < side; i++) {
1940         for (j = 0; j < side; j++) {
1941             if (prev_intra_luma_pred_flag[2 * i + j])
1942                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1943             else
1944                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1945
1946             lc->pu.intra_pred_mode[2 * i + j] =
1947                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1948                                      prev_intra_luma_pred_flag[2 * i + j]);
1949         }
1950     }
1951
1952     if (s->ps.sps->chroma_format_idc == 3) {
1953         for (i = 0; i < side; i++) {
1954             for (j = 0; j < side; j++) {
1955                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1956                 if (chroma_mode != 4) {
1957                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1958                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1959                     else
1960                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1961                 } else {
1962                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1963                 }
1964             }
1965         }
1966     } else if (s->ps.sps->chroma_format_idc == 2) {
1967         int mode_idx;
1968         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1969         if (chroma_mode != 4) {
1970             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1971                 mode_idx = 34;
1972             else
1973                 mode_idx = intra_chroma_table[chroma_mode];
1974         } else {
1975             mode_idx = lc->pu.intra_pred_mode[0];
1976         }
1977         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1978     } else if (s->ps.sps->chroma_format_idc != 0) {
1979         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1980         if (chroma_mode != 4) {
1981             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1982                 lc->pu.intra_pred_mode_c[0] = 34;
1983             else
1984                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1985         } else {
1986             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1987         }
1988     }
1989 }
1990
1991 static void intra_prediction_unit_default_value(HEVCContext *s,
1992                                                 int x0, int y0,
1993                                                 int log2_cb_size)
1994 {
1995     HEVCLocalContext *lc = s->HEVClc;
1996     int pb_size          = 1 << log2_cb_size;
1997     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1998     int min_pu_width     = s->ps.sps->min_pu_width;
1999     MvField *tab_mvf     = s->ref->tab_mvf;
2000     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
2001     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
2002     int j, k;
2003
2004     if (size_in_pus == 0)
2005         size_in_pus = 1;
2006     for (j = 0; j < size_in_pus; j++)
2007         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
2008     if (lc->cu.pred_mode == MODE_INTRA)
2009         for (j = 0; j < size_in_pus; j++)
2010             for (k = 0; k < size_in_pus; k++)
2011                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
2012 }
2013
2014 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
2015 {
2016     int cb_size          = 1 << log2_cb_size;
2017     HEVCLocalContext *lc = s->HEVClc;
2018     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2019     int length           = cb_size >> log2_min_cb_size;
2020     int min_cb_width     = s->ps.sps->min_cb_width;
2021     int x_cb             = x0 >> log2_min_cb_size;
2022     int y_cb             = y0 >> log2_min_cb_size;
2023     int idx              = log2_cb_size - 2;
2024     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2025     int x, y, ret;
2026
2027     lc->cu.x                = x0;
2028     lc->cu.y                = y0;
2029     lc->cu.pred_mode        = MODE_INTRA;
2030     lc->cu.part_mode        = PART_2Nx2N;
2031     lc->cu.intra_split_flag = 0;
2032
2033     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2034     for (x = 0; x < 4; x++)
2035         lc->pu.intra_pred_mode[x] = 1;
2036     if (s->ps.pps->transquant_bypass_enable_flag) {
2037         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2038         if (lc->cu.cu_transquant_bypass_flag)
2039             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2040     } else
2041         lc->cu.cu_transquant_bypass_flag = 0;
2042
2043     if (s->sh.slice_type != I_SLICE) {
2044         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2045
2046         x = y_cb * min_cb_width + x_cb;
2047         for (y = 0; y < length; y++) {
2048             memset(&s->skip_flag[x], skip_flag, length);
2049             x += min_cb_width;
2050         }
2051         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2052     } else {
2053         x = y_cb * min_cb_width + x_cb;
2054         for (y = 0; y < length; y++) {
2055             memset(&s->skip_flag[x], 0, length);
2056             x += min_cb_width;
2057         }
2058     }
2059
2060     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2061         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2062         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2063
2064         if (!s->sh.disable_deblocking_filter_flag)
2065             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2066     } else {
2067         int pcm_flag = 0;
2068
2069         if (s->sh.slice_type != I_SLICE)
2070             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2071         if (lc->cu.pred_mode != MODE_INTRA ||
2072             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2073             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2074             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2075                                       lc->cu.pred_mode == MODE_INTRA;
2076         }
2077
2078         if (lc->cu.pred_mode == MODE_INTRA) {
2079             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2080                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2081                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2082                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2083             }
2084             if (pcm_flag) {
2085                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2086                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2087                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2088                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2089
2090                 if (ret < 0)
2091                     return ret;
2092             } else {
2093                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2094             }
2095         } else {
2096             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2097             switch (lc->cu.part_mode) {
2098             case PART_2Nx2N:
2099                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2100                 break;
2101             case PART_2NxN:
2102                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2103                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2104                 break;
2105             case PART_Nx2N:
2106                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2107                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2108                 break;
2109             case PART_2NxnU:
2110                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2111                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2112                 break;
2113             case PART_2NxnD:
2114                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2115                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2116                 break;
2117             case PART_nLx2N:
2118                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2119                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2120                 break;
2121             case PART_nRx2N:
2122                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2123                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2124                 break;
2125             case PART_NxN:
2126                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2127                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2128                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2129                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2130                 break;
2131             }
2132         }
2133
2134         if (!pcm_flag) {
2135             int rqt_root_cbf = 1;
2136
2137             if (lc->cu.pred_mode != MODE_INTRA &&
2138                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2139                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2140             }
2141             if (rqt_root_cbf) {
2142                 const static int cbf[2] = { 0 };
2143                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2144                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2145                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2146                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2147                                          log2_cb_size,
2148                                          log2_cb_size, 0, 0, cbf, cbf);
2149                 if (ret < 0)
2150                     return ret;
2151             } else {
2152                 if (!s->sh.disable_deblocking_filter_flag)
2153                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2154             }
2155         }
2156     }
2157
2158     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2159         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2160
2161     x = y_cb * min_cb_width + x_cb;
2162     for (y = 0; y < length; y++) {
2163         memset(&s->qp_y_tab[x], lc->qp_y, length);
2164         x += min_cb_width;
2165     }
2166
2167     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2168        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2169         lc->qPy_pred = lc->qp_y;
2170     }
2171
2172     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2173
2174     return 0;
2175 }
2176
2177 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2178                                int log2_cb_size, int cb_depth)
2179 {
2180     HEVCLocalContext *lc = s->HEVClc;
2181     const int cb_size    = 1 << log2_cb_size;
2182     int ret;
2183     int split_cu;
2184
2185     lc->ct_depth = cb_depth;
2186     if (x0 + cb_size <= s->ps.sps->width  &&
2187         y0 + cb_size <= s->ps.sps->height &&
2188         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2189         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2190     } else {
2191         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2192     }
2193     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2194         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2195         lc->tu.is_cu_qp_delta_coded = 0;
2196         lc->tu.cu_qp_delta          = 0;
2197     }
2198
2199     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2200         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2201         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2202     }
2203
2204     if (split_cu) {
2205         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2206         const int cb_size_split = cb_size >> 1;
2207         const int x1 = x0 + cb_size_split;
2208         const int y1 = y0 + cb_size_split;
2209
2210         int more_data = 0;
2211
2212         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2213         if (more_data < 0)
2214             return more_data;
2215
2216         if (more_data && x1 < s->ps.sps->width) {
2217             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2218             if (more_data < 0)
2219                 return more_data;
2220         }
2221         if (more_data && y1 < s->ps.sps->height) {
2222             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2223             if (more_data < 0)
2224                 return more_data;
2225         }
2226         if (more_data && x1 < s->ps.sps->width &&
2227             y1 < s->ps.sps->height) {
2228             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2229             if (more_data < 0)
2230                 return more_data;
2231         }
2232
2233         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2234             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2235             lc->qPy_pred = lc->qp_y;
2236
2237         if (more_data)
2238             return ((x1 + cb_size_split) < s->ps.sps->width ||
2239                     (y1 + cb_size_split) < s->ps.sps->height);
2240         else
2241             return 0;
2242     } else {
2243         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2244         if (ret < 0)
2245             return ret;
2246         if ((!((x0 + cb_size) %
2247                (1 << (s->ps.sps->log2_ctb_size))) ||
2248              (x0 + cb_size >= s->ps.sps->width)) &&
2249             (!((y0 + cb_size) %
2250                (1 << (s->ps.sps->log2_ctb_size))) ||
2251              (y0 + cb_size >= s->ps.sps->height))) {
2252             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2253             return !end_of_slice_flag;
2254         } else {
2255             return 1;
2256         }
2257     }
2258
2259     return 0;
2260 }
2261
2262 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2263                                  int ctb_addr_ts)
2264 {
2265     HEVCLocalContext *lc  = s->HEVClc;
2266     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2267     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2268     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2269
2270     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2271
2272     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2273         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2274             lc->first_qp_group = 1;
2275         lc->end_of_tiles_x = s->ps.sps->width;
2276     } else if (s->ps.pps->tiles_enabled_flag) {
2277         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2278             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2279             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2280             lc->first_qp_group   = 1;
2281         }
2282     } else {
2283         lc->end_of_tiles_x = s->ps.sps->width;
2284     }
2285
2286     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2287
2288     lc->boundary_flags = 0;
2289     if (s->ps.pps->tiles_enabled_flag) {
2290         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2291             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2292         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2293             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2294         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2295             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2296         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2297             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2298     } else {
2299         if (ctb_addr_in_slice <= 0)
2300             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2301         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2302             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2303     }
2304
2305     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2306     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2307     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2308     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2309 }
2310
2311 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2312 {
2313     HEVCContext *s  = avctxt->priv_data;
2314     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2315     int more_data   = 1;
2316     int x_ctb       = 0;
2317     int y_ctb       = 0;
2318     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2319
2320     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2321         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2322         return AVERROR_INVALIDDATA;
2323     }
2324
2325     if (s->sh.dependent_slice_segment_flag) {
2326         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2327         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2328             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2329             return AVERROR_INVALIDDATA;
2330         }
2331     }
2332
2333     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2334         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2335
2336         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2337         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2338         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2339
2340         ff_hevc_cabac_init(s, ctb_addr_ts);
2341
2342         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2343
2344         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2345         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2346         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2347
2348         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2349         if (more_data < 0) {
2350             s->tab_slice_address[ctb_addr_rs] = -1;
2351             return more_data;
2352         }
2353
2354
2355         ctb_addr_ts++;
2356         ff_hevc_save_states(s, ctb_addr_ts);
2357         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2358     }
2359
2360     if (x_ctb + ctb_size >= s->ps.sps->width &&
2361         y_ctb + ctb_size >= s->ps.sps->height)
2362         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2363
2364     return ctb_addr_ts;
2365 }
2366
2367 static int hls_slice_data(HEVCContext *s)
2368 {
2369     int arg[2];
2370     int ret[2];
2371
2372     arg[0] = 0;
2373     arg[1] = 1;
2374
2375     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2376     return ret[0];
2377 }
2378 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2379 {
2380     HEVCContext *s1  = avctxt->priv_data, *s;
2381     HEVCLocalContext *lc;
2382     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2383     int more_data   = 1;
2384     int *ctb_row_p    = input_ctb_row;
2385     int ctb_row = ctb_row_p[job];
2386     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2387     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2388     int thread = ctb_row % s1->threads_number;
2389     int ret;
2390
2391     s = s1->sList[self_id];
2392     lc = s->HEVClc;
2393
2394     if(ctb_row) {
2395         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2396
2397         if (ret < 0)
2398             return ret;
2399         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2400     }
2401
2402     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2403         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2404         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2405
2406         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2407
2408         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2409
2410         if (atomic_load(&s1->wpp_err)) {
2411             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2412             return 0;
2413         }
2414
2415         ff_hevc_cabac_init(s, ctb_addr_ts);
2416         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2417         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2418
2419         if (more_data < 0) {
2420             s->tab_slice_address[ctb_addr_rs] = -1;
2421             atomic_store(&s1->wpp_err, 1);
2422             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2423             return more_data;
2424         }
2425
2426         ctb_addr_ts++;
2427
2428         ff_hevc_save_states(s, ctb_addr_ts);
2429         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2430         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2431
2432         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2433             atomic_store(&s1->wpp_err, 1);
2434             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2435             return 0;
2436         }
2437
2438         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2439             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2440             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2441             return ctb_addr_ts;
2442         }
2443         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2444         x_ctb+=ctb_size;
2445
2446         if(x_ctb >= s->ps.sps->width) {
2447             break;
2448         }
2449     }
2450     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2451
2452     return 0;
2453 }
2454
2455 static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
2456 {
2457     const uint8_t *data = nal->data;
2458     int length          = nal->size;
2459     HEVCLocalContext *lc = s->HEVClc;
2460     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2461     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2462     int64_t offset;
2463     int64_t startheader, cmpt = 0;
2464     int i, j, res = 0;
2465
2466     if (!ret || !arg) {
2467         av_free(ret);
2468         av_free(arg);
2469         return AVERROR(ENOMEM);
2470     }
2471
2472     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2473         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2474             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2475             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2476         );
2477         res = AVERROR_INVALIDDATA;
2478         goto error;
2479     }
2480
2481     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2482
2483     if (!s->sList[1]) {
2484         for (i = 1; i < s->threads_number; i++) {
2485             s->sList[i] = av_malloc(sizeof(HEVCContext));
2486             memcpy(s->sList[i], s, sizeof(HEVCContext));
2487             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2488             s->sList[i]->HEVClc = s->HEVClcList[i];
2489         }
2490     }
2491
2492     offset = (lc->gb.index >> 3);
2493
2494     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2495         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2496             startheader--;
2497             cmpt++;
2498         }
2499     }
2500
2501     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2502         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2503         for (j = 0, cmpt = 0, startheader = offset
2504              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2505             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2506                 startheader--;
2507                 cmpt++;
2508             }
2509         }
2510         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2511         s->sh.offset[i - 1] = offset;
2512
2513     }
2514     if (s->sh.num_entry_point_offsets != 0) {
2515         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2516         if (length < offset) {
2517             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2518             res = AVERROR_INVALIDDATA;
2519             goto error;
2520         }
2521         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2522         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2523
2524     }
2525     s->data = data;
2526
2527     for (i = 1; i < s->threads_number; i++) {
2528         s->sList[i]->HEVClc->first_qp_group = 1;
2529         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2530         memcpy(s->sList[i], s, sizeof(HEVCContext));
2531         s->sList[i]->HEVClc = s->HEVClcList[i];
2532     }
2533
2534     atomic_store(&s->wpp_err, 0);
2535     ff_reset_entries(s->avctx);
2536
2537     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2538         arg[i] = i;
2539         ret[i] = 0;
2540     }
2541
2542     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2543         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2544
2545     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2546         res += ret[i];
2547 error:
2548     av_free(ret);
2549     av_free(arg);
2550     return res;
2551 }
2552
2553 static int set_side_data(HEVCContext *s)
2554 {
2555     AVFrame *out = s->ref->frame;
2556
2557     if (s->sei_frame_packing_present &&
2558         s->frame_packing_arrangement_type >= 3 &&
2559         s->frame_packing_arrangement_type <= 5 &&
2560         s->content_interpretation_type > 0 &&
2561         s->content_interpretation_type < 3) {
2562         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2563         if (!stereo)
2564             return AVERROR(ENOMEM);
2565
2566         switch (s->frame_packing_arrangement_type) {
2567         case 3:
2568             if (s->quincunx_subsampling)
2569                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2570             else
2571                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2572             break;
2573         case 4:
2574             stereo->type = AV_STEREO3D_TOPBOTTOM;
2575             break;
2576         case 5:
2577             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2578             break;
2579         }
2580
2581         if (s->content_interpretation_type == 2)
2582             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2583     }
2584
2585     if (s->sei_display_orientation_present &&
2586         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2587         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2588         AVFrameSideData *rotation = av_frame_new_side_data(out,
2589                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2590                                                            sizeof(int32_t) * 9);
2591         if (!rotation)
2592             return AVERROR(ENOMEM);
2593
2594         av_display_rotation_set((int32_t *)rotation->data, angle);
2595         av_display_matrix_flip((int32_t *)rotation->data,
2596                                s->sei_hflip, s->sei_vflip);
2597     }
2598
2599     // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
2600     // so the side data persists for the entire coded video sequence.
2601     if (s->sei_mastering_display_info_present > 0 &&
2602         IS_IRAP(s) && s->no_rasl_output_flag) {
2603         s->sei_mastering_display_info_present--;
2604     }
2605     if (s->sei_mastering_display_info_present) {
2606         // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
2607         const int mapping[3] = {2, 0, 1};
2608         const int chroma_den = 50000;
2609         const int luma_den = 10000;
2610         int i;
2611         AVMasteringDisplayMetadata *metadata =
2612             av_mastering_display_metadata_create_side_data(out);
2613         if (!metadata)
2614             return AVERROR(ENOMEM);
2615
2616         for (i = 0; i < 3; i++) {
2617             const int j = mapping[i];
2618             metadata->display_primaries[i][0].num = s->display_primaries[j][0];
2619             metadata->display_primaries[i][0].den = chroma_den;
2620             metadata->display_primaries[i][1].num = s->display_primaries[j][1];
2621             metadata->display_primaries[i][1].den = chroma_den;
2622         }
2623         metadata->white_point[0].num = s->white_point[0];
2624         metadata->white_point[0].den = chroma_den;
2625         metadata->white_point[1].num = s->white_point[1];
2626         metadata->white_point[1].den = chroma_den;
2627
2628         metadata->max_luminance.num = s->max_mastering_luminance;
2629         metadata->max_luminance.den = luma_den;
2630         metadata->min_luminance.num = s->min_mastering_luminance;
2631         metadata->min_luminance.den = luma_den;
2632         metadata->has_luminance = 1;
2633         metadata->has_primaries = 1;
2634
2635         av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
2636         av_log(s->avctx, AV_LOG_DEBUG,
2637                "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
2638                av_q2d(metadata->display_primaries[0][0]),
2639                av_q2d(metadata->display_primaries[0][1]),
2640                av_q2d(metadata->display_primaries[1][0]),
2641                av_q2d(metadata->display_primaries[1][1]),
2642                av_q2d(metadata->display_primaries[2][0]),
2643                av_q2d(metadata->display_primaries[2][1]),
2644                av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
2645         av_log(s->avctx, AV_LOG_DEBUG,
2646                "min_luminance=%f, max_luminance=%f\n",
2647                av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
2648     }
2649
2650     if (s->a53_caption) {
2651         AVFrameSideData* sd = av_frame_new_side_data(out,
2652                                                      AV_FRAME_DATA_A53_CC,
2653                                                      s->a53_caption_size);
2654         if (sd)
2655             memcpy(sd->data, s->a53_caption, s->a53_caption_size);
2656         av_freep(&s->a53_caption);
2657         s->a53_caption_size = 0;
2658         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2659     }
2660
2661     return 0;
2662 }
2663
2664 static int hevc_frame_start(HEVCContext *s)
2665 {
2666     HEVCLocalContext *lc = s->HEVClc;
2667     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2668                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2669     int ret;
2670
2671     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2672     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2673     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2674     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2675     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2676
2677     s->is_decoded        = 0;
2678     s->first_nal_type    = s->nal_unit_type;
2679
2680     if (s->ps.pps->tiles_enabled_flag)
2681         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2682
2683     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2684     if (ret < 0)
2685         goto fail;
2686
2687     ret = ff_hevc_frame_rps(s);
2688     if (ret < 0) {
2689         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2690         goto fail;
2691     }
2692
2693     s->ref->frame->key_frame = IS_IRAP(s);
2694
2695     ret = set_side_data(s);
2696     if (ret < 0)
2697         goto fail;
2698
2699     s->frame->pict_type = 3 - s->sh.slice_type;
2700
2701     if (!IS_IRAP(s))
2702         ff_hevc_bump_frame(s);
2703
2704     av_frame_unref(s->output_frame);
2705     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2706     if (ret < 0)
2707         goto fail;
2708
2709     if (!s->avctx->hwaccel)
2710         ff_thread_finish_setup(s->avctx);
2711
2712     return 0;
2713
2714 fail:
2715     if (s->ref)
2716         ff_hevc_unref_frame(s, s->ref, ~0);
2717     s->ref = NULL;
2718     return ret;
2719 }
2720
2721 static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
2722 {
2723     HEVCLocalContext *lc = s->HEVClc;
2724     GetBitContext *gb    = &lc->gb;
2725     int ctb_addr_ts, ret;
2726
2727     *gb              = nal->gb;
2728     s->nal_unit_type = nal->type;
2729     s->temporal_id   = nal->temporal_id;
2730
2731     switch (s->nal_unit_type) {
2732     case HEVC_NAL_VPS:
2733         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2734         if (ret < 0)
2735             goto fail;
2736         break;
2737     case HEVC_NAL_SPS:
2738         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2739                                      s->apply_defdispwin);
2740         if (ret < 0)
2741             goto fail;
2742         break;
2743     case HEVC_NAL_PPS:
2744         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2745         if (ret < 0)
2746             goto fail;
2747         break;
2748     case HEVC_NAL_SEI_PREFIX:
2749     case HEVC_NAL_SEI_SUFFIX:
2750         ret = ff_hevc_decode_nal_sei(s);
2751         if (ret < 0)
2752             goto fail;
2753         break;
2754     case HEVC_NAL_TRAIL_R:
2755     case HEVC_NAL_TRAIL_N:
2756     case HEVC_NAL_TSA_N:
2757     case HEVC_NAL_TSA_R:
2758     case HEVC_NAL_STSA_N:
2759     case HEVC_NAL_STSA_R:
2760     case HEVC_NAL_BLA_W_LP:
2761     case HEVC_NAL_BLA_W_RADL:
2762     case HEVC_NAL_BLA_N_LP:
2763     case HEVC_NAL_IDR_W_RADL:
2764     case HEVC_NAL_IDR_N_LP:
2765     case HEVC_NAL_CRA_NUT:
2766     case HEVC_NAL_RADL_N:
2767     case HEVC_NAL_RADL_R:
2768     case HEVC_NAL_RASL_N:
2769     case HEVC_NAL_RASL_R:
2770         ret = hls_slice_header(s);
2771         if (ret < 0)
2772             return ret;
2773
2774         if (s->max_ra == INT_MAX) {
2775             if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
2776                 s->max_ra = s->poc;
2777             } else {
2778                 if (IS_IDR(s))
2779                     s->max_ra = INT_MIN;
2780             }
2781         }
2782
2783         if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
2784             s->poc <= s->max_ra) {
2785             s->is_decoded = 0;
2786             break;
2787         } else {
2788             if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
2789                 s->max_ra = INT_MIN;
2790         }
2791
2792         if (s->sh.first_slice_in_pic_flag) {
2793             ret = hevc_frame_start(s);
2794             if (ret < 0)
2795                 return ret;
2796         } else if (!s->ref) {
2797             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2798             goto fail;
2799         }
2800
2801         if (s->nal_unit_type != s->first_nal_type) {
2802             av_log(s->avctx, AV_LOG_ERROR,
2803                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2804                    s->first_nal_type, s->nal_unit_type);
2805             return AVERROR_INVALIDDATA;
2806         }
2807
2808         if (!s->sh.dependent_slice_segment_flag &&
2809             s->sh.slice_type != I_SLICE) {
2810             ret = ff_hevc_slice_rpl(s);
2811             if (ret < 0) {
2812                 av_log(s->avctx, AV_LOG_WARNING,
2813                        "Error constructing the reference lists for the current slice.\n");
2814                 goto fail;
2815             }
2816         }
2817
2818         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2819             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2820             if (ret < 0)
2821                 goto fail;
2822         }
2823
2824         if (s->avctx->hwaccel) {
2825             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2826             if (ret < 0)
2827                 goto fail;
2828         } else {
2829             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2830                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2831             else
2832                 ctb_addr_ts = hls_slice_data(s);
2833             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2834                 s->is_decoded = 1;
2835             }
2836
2837             if (ctb_addr_ts < 0) {
2838                 ret = ctb_addr_ts;
2839                 goto fail;
2840             }
2841         }
2842         break;
2843     case HEVC_NAL_EOS_NUT:
2844     case HEVC_NAL_EOB_NUT:
2845         s->seq_decode = (s->seq_decode + 1) & 0xff;
2846         s->max_ra     = INT_MAX;
2847         break;
2848     case HEVC_NAL_AUD:
2849     case HEVC_NAL_FD_NUT:
2850         break;
2851     default:
2852         av_log(s->avctx, AV_LOG_INFO,
2853                "Skipping NAL unit %d\n", s->nal_unit_type);
2854     }
2855
2856     return 0;
2857 fail:
2858     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2859         return ret;
2860     return 0;
2861 }
2862
2863 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2864 {
2865     int i, ret = 0;
2866
2867     s->ref = NULL;
2868     s->last_eos = s->eos;
2869     s->eos = 0;
2870
2871     /* split the input packet into NAL units, so we know the upper bound on the
2872      * number of slices in the frame */
2873     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
2874                                 s->nal_length_size, s->avctx->codec_id, 1);
2875     if (ret < 0) {
2876         av_log(s->avctx, AV_LOG_ERROR,
2877                "Error splitting the input into NAL units.\n");
2878         return ret;
2879     }
2880
2881     for (i = 0; i < s->pkt.nb_nals; i++) {
2882         if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
2883             s->pkt.nals[i].type == HEVC_NAL_EOS_NUT)
2884             s->eos = 1;
2885     }
2886
2887     /* decode the NAL units */
2888     for (i = 0; i < s->pkt.nb_nals; i++) {
2889         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2890         if (ret < 0) {
2891             av_log(s->avctx, AV_LOG_WARNING,
2892                    "Error parsing NAL unit #%d.\n", i);
2893             goto fail;
2894         }
2895     }
2896
2897 fail:
2898     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2899         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2900
2901     return ret;
2902 }
2903
2904 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2905 {
2906     int i;
2907     for (i = 0; i < 16; i++)
2908         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2909 }
2910
2911 static int verify_md5(HEVCContext *s, AVFrame *frame)
2912 {
2913     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2914     int pixel_shift;
2915     int i, j;
2916
2917     if (!desc)
2918         return AVERROR(EINVAL);
2919
2920     pixel_shift = desc->comp[0].depth > 8;
2921
2922     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2923            s->poc);
2924
2925     /* the checksums are LE, so we have to byteswap for >8bpp formats
2926      * on BE arches */
2927 #if HAVE_BIGENDIAN
2928     if (pixel_shift && !s->checksum_buf) {
2929         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2930                        FFMAX3(frame->linesize[0], frame->linesize[1],
2931                               frame->linesize[2]));
2932         if (!s->checksum_buf)
2933             return AVERROR(ENOMEM);
2934     }
2935 #endif
2936
2937     for (i = 0; frame->data[i]; i++) {
2938         int width  = s->avctx->coded_width;
2939         int height = s->avctx->coded_height;
2940         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2941         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2942         uint8_t md5[16];
2943
2944         av_md5_init(s->md5_ctx);
2945         for (j = 0; j < h; j++) {
2946             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2947 #if HAVE_BIGENDIAN
2948             if (pixel_shift) {
2949                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2950                                     (const uint16_t *) src, w);
2951                 src = s->checksum_buf;
2952             }
2953 #endif
2954             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2955         }
2956         av_md5_final(s->md5_ctx, md5);
2957
2958         if (!memcmp(md5, s->md5[i], 16)) {
2959             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2960             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2961             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2962         } else {
2963             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2964             print_md5(s->avctx, AV_LOG_ERROR, md5);
2965             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2966             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2967             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2968             return AVERROR_INVALIDDATA;
2969         }
2970     }
2971
2972     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2973
2974     return 0;
2975 }
2976
2977 static int hevc_decode_extradata(HEVCContext *s, uint8_t *buf, int length)
2978 {
2979     AVCodecContext *avctx = s->avctx;
2980     GetByteContext gb;
2981     int ret, i;
2982
2983     bytestream2_init(&gb, buf, length);
2984
2985     if (length > 3 && (buf[0] || buf[1] || buf[2] > 1)) {
2986         /* It seems the extradata is encoded as hvcC format.
2987          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
2988          * is finalized. When finalized, configurationVersion will be 1 and we
2989          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
2990         int i, j, num_arrays, nal_len_size;
2991
2992         s->is_nalff = 1;
2993
2994         bytestream2_skip(&gb, 21);
2995         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
2996         num_arrays   = bytestream2_get_byte(&gb);
2997
2998         /* nal units in the hvcC always have length coded with 2 bytes,
2999          * so put a fake nal_length_size = 2 while parsing them */
3000         s->nal_length_size = 2;
3001
3002         /* Decode nal units from hvcC. */
3003         for (i = 0; i < num_arrays; i++) {
3004             int type = bytestream2_get_byte(&gb) & 0x3f;
3005             int cnt  = bytestream2_get_be16(&gb);
3006
3007             for (j = 0; j < cnt; j++) {
3008                 // +2 for the nal size field
3009                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3010                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3011                     av_log(s->avctx, AV_LOG_ERROR,
3012                            "Invalid NAL unit size in extradata.\n");
3013                     return AVERROR_INVALIDDATA;
3014                 }
3015
3016                 ret = decode_nal_units(s, gb.buffer, nalsize);
3017                 if (ret < 0) {
3018                     av_log(avctx, AV_LOG_ERROR,
3019                            "Decoding nal unit %d %d from hvcC failed\n",
3020                            type, i);
3021                     return ret;
3022                 }
3023                 bytestream2_skip(&gb, nalsize);
3024             }
3025         }
3026
3027         /* Now store right nal length size, that will be used to parse
3028          * all other nals */
3029         s->nal_length_size = nal_len_size;
3030     } else {
3031         s->is_nalff = 0;
3032         ret = decode_nal_units(s, buf, length);
3033         if (ret < 0)
3034             return ret;
3035     }
3036
3037     /* export stream parameters from the first SPS */
3038     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3039         if (s->ps.sps_list[i]) {
3040             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3041             export_stream_params(s->avctx, &s->ps, sps);
3042             break;
3043         }
3044     }
3045
3046     return 0;
3047 }
3048
3049 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3050                              AVPacket *avpkt)
3051 {
3052     int ret;
3053     int new_extradata_size;
3054     uint8_t *new_extradata;
3055     HEVCContext *s = avctx->priv_data;
3056
3057     if (!avpkt->size) {
3058         ret = ff_hevc_output_frame(s, data, 1);
3059         if (ret < 0)
3060             return ret;
3061
3062         *got_output = ret;
3063         return 0;
3064     }
3065
3066     new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
3067                                             &new_extradata_size);
3068     if (new_extradata && new_extradata_size > 0) {
3069         ret = hevc_decode_extradata(s, new_extradata, new_extradata_size);
3070         if (ret < 0)
3071             return ret;
3072     }
3073
3074     s->ref = NULL;
3075     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3076     if (ret < 0)
3077         return ret;
3078
3079     if (avctx->hwaccel) {
3080         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
3081             av_log(avctx, AV_LOG_ERROR,
3082                    "hardware accelerator failed to decode picture\n");
3083             ff_hevc_unref_frame(s, s->ref, ~0);
3084             return ret;
3085         }
3086     } else {
3087         /* verify the SEI checksum */
3088         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3089             s->is_md5) {
3090             ret = verify_md5(s, s->ref->frame);
3091             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3092                 ff_hevc_unref_frame(s, s->ref, ~0);
3093                 return ret;
3094             }
3095         }
3096     }
3097     s->is_md5 = 0;
3098
3099     if (s->is_decoded) {
3100         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3101         s->is_decoded = 0;
3102     }
3103
3104     if (s->output_frame->buf[0]) {
3105         av_frame_move_ref(data, s->output_frame);
3106         *got_output = 1;
3107     }
3108
3109     return avpkt->size;
3110 }
3111
3112 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3113 {
3114     int ret;
3115
3116     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3117     if (ret < 0)
3118         return ret;
3119
3120     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3121     if (!dst->tab_mvf_buf)
3122         goto fail;
3123     dst->tab_mvf = src->tab_mvf;
3124
3125     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3126     if (!dst->rpl_tab_buf)
3127         goto fail;
3128     dst->rpl_tab = src->rpl_tab;
3129
3130     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3131     if (!dst->rpl_buf)
3132         goto fail;
3133
3134     dst->poc        = src->poc;
3135     dst->ctb_count  = src->ctb_count;
3136     dst->window     = src->window;
3137     dst->flags      = src->flags;
3138     dst->sequence   = src->sequence;
3139
3140     if (src->hwaccel_picture_private) {
3141         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3142         if (!dst->hwaccel_priv_buf)
3143             goto fail;
3144         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3145     }
3146
3147     return 0;
3148 fail:
3149     ff_hevc_unref_frame(s, dst, ~0);
3150     return AVERROR(ENOMEM);
3151 }
3152
3153 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3154 {
3155     HEVCContext       *s = avctx->priv_data;
3156     int i;
3157
3158     pic_arrays_free(s);
3159
3160     av_freep(&s->md5_ctx);
3161
3162     av_freep(&s->cabac_state);
3163
3164     for (i = 0; i < 3; i++) {
3165         av_freep(&s->sao_pixel_buffer_h[i]);
3166         av_freep(&s->sao_pixel_buffer_v[i]);
3167     }
3168     av_frame_free(&s->output_frame);
3169
3170     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3171         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3172         av_frame_free(&s->DPB[i].frame);
3173     }
3174
3175     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3176         av_buffer_unref(&s->ps.vps_list[i]);
3177     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3178         av_buffer_unref(&s->ps.sps_list[i]);
3179     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3180         av_buffer_unref(&s->ps.pps_list[i]);
3181     s->ps.sps = NULL;
3182     s->ps.pps = NULL;
3183     s->ps.vps = NULL;
3184
3185     av_freep(&s->sh.entry_point_offset);
3186     av_freep(&s->sh.offset);
3187     av_freep(&s->sh.size);
3188
3189     for (i = 1; i < s->threads_number; i++) {
3190         HEVCLocalContext *lc = s->HEVClcList[i];
3191         if (lc) {
3192             av_freep(&s->HEVClcList[i]);
3193             av_freep(&s->sList[i]);
3194         }
3195     }
3196     if (s->HEVClc == s->HEVClcList[0])
3197         s->HEVClc = NULL;
3198     av_freep(&s->HEVClcList[0]);
3199
3200     ff_h2645_packet_uninit(&s->pkt);
3201
3202     return 0;
3203 }
3204
3205 static av_cold int hevc_init_context(AVCodecContext *avctx)
3206 {
3207     HEVCContext *s = avctx->priv_data;
3208     int i;
3209
3210     s->avctx = avctx;
3211
3212     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3213     if (!s->HEVClc)
3214         goto fail;
3215     s->HEVClcList[0] = s->HEVClc;
3216     s->sList[0] = s;
3217
3218     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3219     if (!s->cabac_state)
3220         goto fail;
3221
3222     s->output_frame = av_frame_alloc();
3223     if (!s->output_frame)
3224         goto fail;
3225
3226     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3227         s->DPB[i].frame = av_frame_alloc();
3228         if (!s->DPB[i].frame)
3229             goto fail;
3230         s->DPB[i].tf.f = s->DPB[i].frame;
3231     }
3232
3233     s->max_ra = INT_MAX;
3234
3235     s->md5_ctx = av_md5_alloc();
3236     if (!s->md5_ctx)
3237         goto fail;
3238
3239     ff_bswapdsp_init(&s->bdsp);
3240
3241     s->context_initialized = 1;
3242     s->eos = 0;
3243
3244     ff_hevc_reset_sei(s);
3245
3246     return 0;
3247
3248 fail:
3249     hevc_decode_free(avctx);
3250     return AVERROR(ENOMEM);
3251 }
3252
3253 static int hevc_update_thread_context(AVCodecContext *dst,
3254                                       const AVCodecContext *src)
3255 {
3256     HEVCContext *s  = dst->priv_data;
3257     HEVCContext *s0 = src->priv_data;
3258     int i, ret;
3259
3260     if (!s->context_initialized) {
3261         ret = hevc_init_context(dst);
3262         if (ret < 0)
3263             return ret;
3264     }
3265
3266     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3267         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3268         if (s0->DPB[i].frame->buf[0]) {
3269             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3270             if (ret < 0)
3271                 return ret;
3272         }
3273     }
3274
3275     if (s->ps.sps != s0->ps.sps)
3276         s->ps.sps = NULL;
3277     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3278         av_buffer_unref(&s->ps.vps_list[i]);
3279         if (s0->ps.vps_list[i]) {
3280             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3281             if (!s->ps.vps_list[i])
3282                 return AVERROR(ENOMEM);
3283         }
3284     }
3285
3286     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3287         av_buffer_unref(&s->ps.sps_list[i]);
3288         if (s0->ps.sps_list[i]) {
3289             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3290             if (!s->ps.sps_list[i])
3291                 return AVERROR(ENOMEM);
3292         }
3293     }
3294
3295     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3296         av_buffer_unref(&s->ps.pps_list[i]);
3297         if (s0->ps.pps_list[i]) {
3298             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3299             if (!s->ps.pps_list[i])
3300                 return AVERROR(ENOMEM);
3301         }
3302     }
3303
3304     if (s->ps.sps != s0->ps.sps)
3305         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3306             return ret;
3307
3308     s->seq_decode = s0->seq_decode;
3309     s->seq_output = s0->seq_output;
3310     s->pocTid0    = s0->pocTid0;
3311     s->max_ra     = s0->max_ra;
3312     s->eos        = s0->eos;
3313     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3314
3315     s->is_nalff        = s0->is_nalff;
3316     s->nal_length_size = s0->nal_length_size;
3317
3318     s->threads_number      = s0->threads_number;
3319     s->threads_type        = s0->threads_type;
3320
3321     if (s0->eos) {
3322         s->seq_decode = (s->seq_decode + 1) & 0xff;
3323         s->max_ra = INT_MAX;
3324     }
3325
3326     return 0;
3327 }
3328
3329 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3330 {
3331     HEVCContext *s = avctx->priv_data;
3332     int ret;
3333
3334     avctx->internal->allocate_progress = 1;
3335
3336     ret = hevc_init_context(avctx);
3337     if (ret < 0)
3338         return ret;
3339
3340     s->enable_parallel_tiles = 0;
3341     s->picture_struct = 0;
3342     s->eos = 1;
3343
3344     atomic_init(&s->wpp_err, 0);
3345
3346     if(avctx->active_thread_type & FF_THREAD_SLICE)
3347         s->threads_number = avctx->thread_count;
3348     else
3349         s->threads_number = 1;
3350
3351     if (avctx->extradata_size > 0 && avctx->extradata) {
3352         ret = hevc_decode_extradata(s, avctx->extradata, avctx->extradata_size);
3353         if (ret < 0) {
3354             hevc_decode_free(avctx);
3355             return ret;
3356         }
3357     }
3358
3359     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3360             s->threads_type = FF_THREAD_FRAME;
3361         else
3362             s->threads_type = FF_THREAD_SLICE;
3363
3364     return 0;
3365 }
3366
3367 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3368 {
3369     HEVCContext *s = avctx->priv_data;
3370     int ret;
3371
3372     memset(s, 0, sizeof(*s));
3373
3374     ret = hevc_init_context(avctx);
3375     if (ret < 0)
3376         return ret;
3377
3378     return 0;
3379 }
3380
3381 static void hevc_decode_flush(AVCodecContext *avctx)
3382 {
3383     HEVCContext *s = avctx->priv_data;
3384     ff_hevc_flush_dpb(s);
3385     s->max_ra = INT_MAX;
3386     s->eos = 1;
3387 }
3388
3389 #define OFFSET(x) offsetof(HEVCContext, x)
3390 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3391
3392 static const AVOption options[] = {
3393     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3394         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3395     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3396         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3397     { NULL },
3398 };
3399
3400 static const AVClass hevc_decoder_class = {
3401     .class_name = "HEVC decoder",
3402     .item_name  = av_default_item_name,
3403     .option     = options,
3404     .version    = LIBAVUTIL_VERSION_INT,
3405 };
3406
3407 AVCodec ff_hevc_decoder = {
3408     .name                  = "hevc",
3409     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3410     .type                  = AVMEDIA_TYPE_VIDEO,
3411     .id                    = AV_CODEC_ID_HEVC,
3412     .priv_data_size        = sizeof(HEVCContext),
3413     .priv_class            = &hevc_decoder_class,
3414     .init                  = hevc_decode_init,
3415     .close                 = hevc_decode_free,
3416     .decode                = hevc_decode_frame,
3417     .flush                 = hevc_decode_flush,
3418     .update_thread_context = hevc_update_thread_context,
3419     .init_thread_copy      = hevc_init_thread_copy,
3420     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3421                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3422     .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE,
3423     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3424 };