git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_c(luma_log2_weight_denom, 0, 7);
 152     if (s->sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
 284 {
 285     int ret, i;
 286
 287     frame->width  = FFALIGN(s->avctx->coded_width + 2, FF_INPUT_BUFFER_PADDING_SIZE);
 288     frame->height = s->avctx->coded_height + 3;
 289     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 290         return ret;
 291     for (i = 0; frame->data[i]; i++) {
 292         int offset = frame->linesize[i] + FF_INPUT_BUFFER_PADDING_SIZE;
 293         frame->data[i] += offset;
 294     }
 295     frame->width  = s->avctx->coded_width;
 296     frame->height = s->avctx->coded_height;
 297
 298     return 0;
 299 }
 300
 301 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 302 {
 303     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
 304     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 305     int ret;
 306     unsigned int num = 0, den = 0;
 307
 308     pic_arrays_free(s);
 309     ret = pic_arrays_init(s, sps);
 310     if (ret < 0)
 311         goto fail;
 312
 313     s->avctx->coded_width         = sps->width;
 314     s->avctx->coded_height        = sps->height;
 315     s->avctx->width               = sps->output_width;
 316     s->avctx->height              = sps->output_height;
 317     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 318
 319     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 320 #if CONFIG_HEVC_DXVA2_HWACCEL
 321         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 322 #endif
 323     }
 324
 325     *fmt++ = sps->pix_fmt;
 326     *fmt = AV_PIX_FMT_NONE;
 327
 328     ret = ff_thread_get_format(s->avctx, pix_fmts);
 329     if (ret < 0)
 330         goto fail;
 331     s->avctx->pix_fmt = ret;
 332
 333     ff_set_sar(s->avctx, sps->vui.sar);
 334
 335     if (sps->vui.video_signal_type_present_flag)
 336         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 337                                                                : AVCOL_RANGE_MPEG;
 338     else
 339         s->avctx->color_range = AVCOL_RANGE_MPEG;
 340
 341     if (sps->vui.colour_description_present_flag) {
 342         s->avctx->color_primaries = sps->vui.colour_primaries;
 343         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 344         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 345     } else {
 346         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 347         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 348         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 349     }
 350
 351     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 352     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 353     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 354
 355     if (sps->sao_enabled && !s->avctx->hwaccel) {
 356 #ifdef USE_SAO_SMALL_BUFFER
 357         {
 358             int ctb_size = 1 << sps->log2_ctb_size;
 359             int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 360             int c_idx, i;
 361
 362             for (i = 0; i < s->threads_number ; i++) {
 363                 HEVCLocalContext    *lc = s->HEVClcList[i];
 364                 lc->sao_pixel_buffer =
 365                     av_malloc(((ctb_size + 2) * (ctb_size + 2)) <<
 366                               sps->pixel_shift);
 367             }
 368             for(c_idx = 0; c_idx < c_count; c_idx++) {
 369                 int w = sps->width >> sps->hshift[c_idx];
 370                 int h = sps->height >> sps->vshift[c_idx];
 371                 s->sao_pixel_buffer_h[c_idx] =
 372                 av_malloc((w * 2 * sps->ctb_height) <<
 373                           sps->pixel_shift);
 374                 s->sao_pixel_buffer_v[c_idx] =
 375                 av_malloc((h * 2 * sps->ctb_width) <<
 376                           sps->pixel_shift);
 377             }
 378         }
 379 #else
 380         av_frame_unref(s->tmp_frame);
 381         ret = get_buffer_sao(s, s->tmp_frame, sps);
 382         s->sao_frame = s->tmp_frame;
 383 #endif
 384     }
 385
 386     s->sps = sps;
 387     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 388
 389     if (s->vps->vps_timing_info_present_flag) {
 390         num = s->vps->vps_num_units_in_tick;
 391         den = s->vps->vps_time_scale;
 392     } else if (sps->vui.vui_timing_info_present_flag) {
 393         num = sps->vui.vui_num_units_in_tick;
 394         den = sps->vui.vui_time_scale;
 395     }
 396
 397     if (num != 0 && den != 0)
 398         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
 399                   num, den, 1 << 30);
 400
 401     return 0;
 402
 403 fail:
 404     pic_arrays_free(s);
 405     s->sps = NULL;
 406     return ret;
 407 }
 408
 409 static int hls_slice_header(HEVCContext *s)
 410 {
 411     GetBitContext *gb = &s->HEVClc->gb;
 412     SliceHeader *sh   = &s->sh;
 413     int i, j, ret;
 414
 415     // Coded parameters
 416     sh->first_slice_in_pic_flag = get_bits1(gb);
 417     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 418         s->seq_decode = (s->seq_decode + 1) & 0xff;
 419         s->max_ra     = INT_MAX;
 420         if (IS_IDR(s))
 421             ff_hevc_clear_refs(s);
 422     }
 423     sh->no_output_of_prior_pics_flag = 0;
 424     if (IS_IRAP(s))
 425         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 426
 427     sh->pps_id = get_ue_golomb_long(gb);
 428     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 429         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 430         return AVERROR_INVALIDDATA;
 431     }
 432     if (!sh->first_slice_in_pic_flag &&
 433         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 434         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 435         return AVERROR_INVALIDDATA;
 436     }
 437     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 438     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 439         sh->no_output_of_prior_pics_flag = 1;
 440
 441     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 442         const HEVCSPS* last_sps = s->sps;
 443         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 444         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 445             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 446                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 447                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 448                 sh->no_output_of_prior_pics_flag = 0;
 449         }
 450         ff_hevc_clear_refs(s);
 451         ret = set_sps(s, s->sps);
 452         if (ret < 0)
 453             return ret;
 454
 455         s->seq_decode = (s->seq_decode + 1) & 0xff;
 456         s->max_ra     = INT_MAX;
 457     }
 458
 459     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 460     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 461
 462     sh->dependent_slice_segment_flag = 0;
 463     if (!sh->first_slice_in_pic_flag) {
 464         int slice_address_length;
 465
 466         if (s->pps->dependent_slice_segments_enabled_flag)
 467             sh->dependent_slice_segment_flag = get_bits1(gb);
 468
 469         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 470                                             s->sps->ctb_height);
 471         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 472         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 473             av_log(s->avctx, AV_LOG_ERROR,
 474                    "Invalid slice segment address: %u.\n",
 475                    sh->slice_segment_addr);
 476             return AVERROR_INVALIDDATA;
 477         }
 478
 479         if (!sh->dependent_slice_segment_flag) {
 480             sh->slice_addr = sh->slice_segment_addr;
 481             s->slice_idx++;
 482         }
 483     } else {
 484         sh->slice_segment_addr = sh->slice_addr = 0;
 485         s->slice_idx           = 0;
 486         s->slice_initialized   = 0;
 487     }
 488
 489     if (!sh->dependent_slice_segment_flag) {
 490         s->slice_initialized = 0;
 491
 492         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 493             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 494
 495         sh->slice_type = get_ue_golomb_long(gb);
 496         if (!(sh->slice_type == I_SLICE ||
 497               sh->slice_type == P_SLICE ||
 498               sh->slice_type == B_SLICE)) {
 499             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 500                    sh->slice_type);
 501             return AVERROR_INVALIDDATA;
 502         }
 503         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 504             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 505             return AVERROR_INVALIDDATA;
 506         }
 507
 508         // when flag is not present, picture is inferred to be output
 509         sh->pic_output_flag = 1;
 510         if (s->pps->output_flag_present_flag)
 511             sh->pic_output_flag = get_bits1(gb);
 512
 513         if (s->sps->separate_colour_plane_flag)
 514             sh->colour_plane_id = get_bits(gb, 2);
 515
 516         if (!IS_IDR(s)) {
 517             int poc;
 518
 519             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 520             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 521             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 522                 av_log(s->avctx, AV_LOG_WARNING,
 523                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 524                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 525                     return AVERROR_INVALIDDATA;
 526                 poc = s->poc;
 527             }
 528             s->poc = poc;
 529
 530             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 531             if (!sh->short_term_ref_pic_set_sps_flag) {
 532                 int pos = get_bits_left(gb);
 533                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 534                 if (ret < 0)
 535                     return ret;
 536
 537                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 538                 sh->short_term_rps = &sh->slice_rps;
 539             } else {
 540                 int numbits, rps_idx;
 541
 542                 if (!s->sps->nb_st_rps) {
 543                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 544                     return AVERROR_INVALIDDATA;
 545                 }
 546
 547                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 548                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 549                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 550             }
 551
 552             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 553             if (ret < 0) {
 554                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 555                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 556                     return AVERROR_INVALIDDATA;
 557             }
 558
 559             if (s->sps->sps_temporal_mvp_enabled_flag)
 560                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 561             else
 562                 sh->slice_temporal_mvp_enabled_flag = 0;
 563         } else {
 564             s->sh.short_term_rps = NULL;
 565             s->poc               = 0;
 566         }
 567
 568         /* 8.3.1 */
 569         if (s->temporal_id == 0 &&
 570             s->nal_unit_type != NAL_TRAIL_N &&
 571             s->nal_unit_type != NAL_TSA_N   &&
 572             s->nal_unit_type != NAL_STSA_N  &&
 573             s->nal_unit_type != NAL_RADL_N  &&
 574             s->nal_unit_type != NAL_RADL_R  &&
 575             s->nal_unit_type != NAL_RASL_N  &&
 576             s->nal_unit_type != NAL_RASL_R)
 577             s->pocTid0 = s->poc;
 578
 579         if (s->sps->sao_enabled) {
 580             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 581             if (s->sps->chroma_format_idc) {
 582                 sh->slice_sample_adaptive_offset_flag[1] =
 583                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 584             }
 585         } else {
 586             sh->slice_sample_adaptive_offset_flag[0] = 0;
 587             sh->slice_sample_adaptive_offset_flag[1] = 0;
 588             sh->slice_sample_adaptive_offset_flag[2] = 0;
 589         }
 590
 591         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 592         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 593             int nb_refs;
 594
 595             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 596             if (sh->slice_type == B_SLICE)
 597                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 598
 599             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 600                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 601                 if (sh->slice_type == B_SLICE)
 602                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 603             }
 604             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 605                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 606                        sh->nb_refs[L0], sh->nb_refs[L1]);
 607                 return AVERROR_INVALIDDATA;
 608             }
 609
 610             sh->rpl_modification_flag[0] = 0;
 611             sh->rpl_modification_flag[1] = 0;
 612             nb_refs = ff_hevc_frame_nb_refs(s);
 613             if (!nb_refs) {
 614                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 615                 return AVERROR_INVALIDDATA;
 616             }
 617
 618             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 619                 sh->rpl_modification_flag[0] = get_bits1(gb);
 620                 if (sh->rpl_modification_flag[0]) {
 621                     for (i = 0; i < sh->nb_refs[L0]; i++)
 622                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 623                 }
 624
 625                 if (sh->slice_type == B_SLICE) {
 626                     sh->rpl_modification_flag[1] = get_bits1(gb);
 627                     if (sh->rpl_modification_flag[1] == 1)
 628                         for (i = 0; i < sh->nb_refs[L1]; i++)
 629                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 630                 }
 631             }
 632
 633             if (sh->slice_type == B_SLICE)
 634                 sh->mvd_l1_zero_flag = get_bits1(gb);
 635
 636             if (s->pps->cabac_init_present_flag)
 637                 sh->cabac_init_flag = get_bits1(gb);
 638             else
 639                 sh->cabac_init_flag = 0;
 640
 641             sh->collocated_ref_idx = 0;
 642             if (sh->slice_temporal_mvp_enabled_flag) {
 643                 sh->collocated_list = L0;
 644                 if (sh->slice_type == B_SLICE)
 645                     sh->collocated_list = !get_bits1(gb);
 646
 647                 if (sh->nb_refs[sh->collocated_list] > 1) {
 648                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 649                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 650                         av_log(s->avctx, AV_LOG_ERROR,
 651                                "Invalid collocated_ref_idx: %d.\n",
 652                                sh->collocated_ref_idx);
 653                         return AVERROR_INVALIDDATA;
 654                     }
 655                 }
 656             }
 657
 658             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 659                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 660                 pred_weight_table(s, gb);
 661             }
 662
 663             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 664             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 665                 av_log(s->avctx, AV_LOG_ERROR,
 666                        "Invalid number of merging MVP candidates: %d.\n",
 667                        sh->max_num_merge_cand);
 668                 return AVERROR_INVALIDDATA;
 669             }
 670         }
 671
 672         sh->slice_qp_delta = get_se_golomb(gb);
 673
 674         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 675             sh->slice_cb_qp_offset = get_se_golomb(gb);
 676             sh->slice_cr_qp_offset = get_se_golomb(gb);
 677         } else {
 678             sh->slice_cb_qp_offset = 0;
 679             sh->slice_cr_qp_offset = 0;
 680         }
 681
 682         if (s->pps->chroma_qp_offset_list_enabled_flag)
 683             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 684         else
 685             sh->cu_chroma_qp_offset_enabled_flag = 0;
 686
 687         if (s->pps->deblocking_filter_control_present_flag) {
 688             int deblocking_filter_override_flag = 0;
 689
 690             if (s->pps->deblocking_filter_override_enabled_flag)
 691                 deblocking_filter_override_flag = get_bits1(gb);
 692
 693             if (deblocking_filter_override_flag) {
 694                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 695                 if (!sh->disable_deblocking_filter_flag) {
 696                     sh->beta_offset = get_se_golomb(gb) * 2;
 697                     sh->tc_offset   = get_se_golomb(gb) * 2;
 698                 }
 699             } else {
 700                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 701                 sh->beta_offset                    = s->pps->beta_offset;
 702                 sh->tc_offset                      = s->pps->tc_offset;
 703             }
 704         } else {
 705             sh->disable_deblocking_filter_flag = 0;
 706             sh->beta_offset                    = 0;
 707             sh->tc_offset                      = 0;
 708         }
 709
 710         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 711             (sh->slice_sample_adaptive_offset_flag[0] ||
 712              sh->slice_sample_adaptive_offset_flag[1] ||
 713              !sh->disable_deblocking_filter_flag)) {
 714             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 715         } else {
 716             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 717         }
 718     } else if (!s->slice_initialized) {
 719         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 720         return AVERROR_INVALIDDATA;
 721     }
 722
 723     sh->num_entry_point_offsets = 0;
 724     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 725         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 726         if (sh->num_entry_point_offsets > 0) {
 727             int offset_len = get_ue_golomb_long(gb) + 1;
 728             int segments = offset_len >> 4;
 729             int rest = (offset_len & 15);
 730             av_freep(&sh->entry_point_offset);
 731             av_freep(&sh->offset);
 732             av_freep(&sh->size);
 733             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 734             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 735             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 736             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 737                 sh->num_entry_point_offsets = 0;
 738                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 739                 return AVERROR(ENOMEM);
 740             }
 741             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 742                 int val = 0;
 743                 for (j = 0; j < segments; j++) {
 744                     val <<= 16;
 745                     val += get_bits(gb, 16);
 746                 }
 747                 if (rest) {
 748                     val <<= rest;
 749                     val += get_bits(gb, rest);
 750                 }
 751                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 752             }
 753             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 754                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 755                 s->threads_number = 1;
 756             } else
 757                 s->enable_parallel_tiles = 0;
 758         } else
 759             s->enable_parallel_tiles = 0;
 760     }
 761
 762     if (s->pps->slice_header_extension_present_flag) {
 763         unsigned int length = get_ue_golomb_long(gb);
 764         if (length*8LL > get_bits_left(gb)) {
 765             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 766             return AVERROR_INVALIDDATA;
 767         }
 768         for (i = 0; i < length; i++)
 769             skip_bits(gb, 8);  // slice_header_extension_data_byte
 770     }
 771
 772     // Inferred parameters
 773     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 774     if (sh->slice_qp > 51 ||
 775         sh->slice_qp < -s->sps->qp_bd_offset) {
 776         av_log(s->avctx, AV_LOG_ERROR,
 777                "The slice_qp %d is outside the valid range "
 778                "[%d, 51].\n",
 779                sh->slice_qp,
 780                -s->sps->qp_bd_offset);
 781         return AVERROR_INVALIDDATA;
 782     }
 783
 784     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 785
 786     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 787         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 788         return AVERROR_INVALIDDATA;
 789     }
 790
 791     if (get_bits_left(gb) < 0) {
 792         av_log(s->avctx, AV_LOG_ERROR,
 793                "Overread slice header by %d bits\n", -get_bits_left(gb));
 794         return AVERROR_INVALIDDATA;
 795     }
 796
 797     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 798
 799     if (!s->pps->cu_qp_delta_enabled_flag)
 800         s->HEVClc->qp_y = s->sh.slice_qp;
 801
 802     s->slice_initialized = 1;
 803     s->HEVClc->tu.cu_qp_offset_cb = 0;
 804     s->HEVClc->tu.cu_qp_offset_cr = 0;
 805
 806     return 0;
 807 }
 808
 809 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 810
 811 #define SET_SAO(elem, value)                            \
 812 do {                                                    \
 813     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 814         sao->elem = value;                              \
 815     else if (sao_merge_left_flag)                       \
 816         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 817     else if (sao_merge_up_flag)                         \
 818         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 819     else                                                \
 820         sao->elem = 0;                                  \
 821 } while (0)
 822
 823 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 824 {
 825     HEVCLocalContext *lc    = s->HEVClc;
 826     int sao_merge_left_flag = 0;
 827     int sao_merge_up_flag   = 0;
 828     SAOParams *sao          = &CTB(s->sao, rx, ry);
 829     int c_idx, i;
 830
 831     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 832         s->sh.slice_sample_adaptive_offset_flag[1]) {
 833         if (rx > 0) {
 834             if (lc->ctb_left_flag)
 835                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 836         }
 837         if (ry > 0 && !sao_merge_left_flag) {
 838             if (lc->ctb_up_flag)
 839                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 840         }
 841     }
 842
 843     for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
 844         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 845                                                  s->pps->log2_sao_offset_scale_chroma;
 846
 847         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 848             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 849             continue;
 850         }
 851
 852         if (c_idx == 2) {
 853             sao->type_idx[2] = sao->type_idx[1];
 854             sao->eo_class[2] = sao->eo_class[1];
 855         } else {
 856             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 857         }
 858
 859         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 860             continue;
 861
 862         for (i = 0; i < 4; i++)
 863             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 864
 865         if (sao->type_idx[c_idx] == SAO_BAND) {
 866             for (i = 0; i < 4; i++) {
 867                 if (sao->offset_abs[c_idx][i]) {
 868                     SET_SAO(offset_sign[c_idx][i],
 869                             ff_hevc_sao_offset_sign_decode(s));
 870                 } else {
 871                     sao->offset_sign[c_idx][i] = 0;
 872                 }
 873             }
 874             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 875         } else if (c_idx != 2) {
 876             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 877         }
 878
 879         // Inferred parameters
 880         sao->offset_val[c_idx][0] = 0;
 881         for (i = 0; i < 4; i++) {
 882             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 883             if (sao->type_idx[c_idx] == SAO_EDGE) {
 884                 if (i > 1)
 885                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 886             } else if (sao->offset_sign[c_idx][i]) {
 887                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 888             }
 889             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 890         }
 891     }
 892 }
 893
 894 #undef SET_SAO
 895 #undef CTB
 896
 897 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 898     HEVCLocalContext *lc    = s->HEVClc;
 899     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 900
 901     if (log2_res_scale_abs_plus1 !=  0) {
 902         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 903         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 904                                (1 - 2 * res_scale_sign_flag);
 905     } else {
 906         lc->tu.res_scale_val = 0;
 907     }
 908
 909
 910     return 0;
 911 }
 912
 913 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 914                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 915                               int log2_cb_size, int log2_trafo_size,
 916                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 917 {
 918     HEVCLocalContext *lc = s->HEVClc;
 919     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 920     int i;
 921
 922     if (lc->cu.pred_mode == MODE_INTRA) {
 923         int trafo_size = 1 << log2_trafo_size;
 924         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 925
 926         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 927     }
 928
 929     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 930         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 931         int scan_idx   = SCAN_DIAG;
 932         int scan_idx_c = SCAN_DIAG;
 933         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 934                          (s->sps->chroma_format_idc == 2 &&
 935                          (cbf_cb[1] || cbf_cr[1]));
 936
 937         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 938             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 939             if (lc->tu.cu_qp_delta != 0)
 940                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 941                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 942             lc->tu.is_cu_qp_delta_coded = 1;
 943
 944             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 945                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 946                 av_log(s->avctx, AV_LOG_ERROR,
 947                        "The cu_qp_delta %d is outside the valid range "
 948                        "[%d, %d].\n",
 949                        lc->tu.cu_qp_delta,
 950                        -(26 + s->sps->qp_bd_offset / 2),
 951                         (25 + s->sps->qp_bd_offset / 2));
 952                 return AVERROR_INVALIDDATA;
 953             }
 954
 955             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 956         }
 957
 958         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 959             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 960             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 961             if (cu_chroma_qp_offset_flag) {
 962                 int cu_chroma_qp_offset_idx  = 0;
 963                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 964                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 965                     av_log(s->avctx, AV_LOG_ERROR,
 966                         "cu_chroma_qp_offset_idx not yet tested.\n");
 967                 }
 968                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 969                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 970             } else {
 971                 lc->tu.cu_qp_offset_cb = 0;
 972                 lc->tu.cu_qp_offset_cr = 0;
 973             }
 974             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 975         }
 976
 977         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 978             if (lc->tu.intra_pred_mode >= 6 &&
 979                 lc->tu.intra_pred_mode <= 14) {
 980                 scan_idx = SCAN_VERT;
 981             } else if (lc->tu.intra_pred_mode >= 22 &&
 982                        lc->tu.intra_pred_mode <= 30) {
 983                 scan_idx = SCAN_HORIZ;
 984             }
 985
 986             if (lc->tu.intra_pred_mode_c >=  6 &&
 987                 lc->tu.intra_pred_mode_c <= 14) {
 988                 scan_idx_c = SCAN_VERT;
 989             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 990                        lc->tu.intra_pred_mode_c <= 30) {
 991                 scan_idx_c = SCAN_HORIZ;
 992             }
 993         }
 994
 995         lc->tu.cross_pf = 0;
 996
 997         if (cbf_luma)
 998             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 999         if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1000             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1001             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1002             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
1003                                 (lc->cu.pred_mode == MODE_INTER ||
1004                                  (lc->tu.chroma_mode_c ==  4)));
1005
1006             if (lc->tu.cross_pf) {
1007                 hls_cross_component_pred(s, 0);
1008             }
1009             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1010                 if (lc->cu.pred_mode == MODE_INTRA) {
1011                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1012                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1013                 }
1014                 if (cbf_cb[i])
1015                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1016                                                 log2_trafo_size_c, scan_idx_c, 1);
1017                 else
1018                     if (lc->tu.cross_pf) {
1019                         ptrdiff_t stride = s->frame->linesize[1];
1020                         int hshift = s->sps->hshift[1];
1021                         int vshift = s->sps->vshift[1];
1022                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1023                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1024                         int size = 1 << log2_trafo_size_c;
1025
1026                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1027                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
1028                         for (i = 0; i < (size * size); i++) {
1029                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1030                         }
1031                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1032                     }
1033             }
1034
1035             if (lc->tu.cross_pf) {
1036                 hls_cross_component_pred(s, 1);
1037             }
1038             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1039                 if (lc->cu.pred_mode == MODE_INTRA) {
1040                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1041                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1042                 }
1043                 if (cbf_cr[i])
1044                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1045                                                 log2_trafo_size_c, scan_idx_c, 2);
1046                 else
1047                     if (lc->tu.cross_pf) {
1048                         ptrdiff_t stride = s->frame->linesize[2];
1049                         int hshift = s->sps->hshift[2];
1050                         int vshift = s->sps->vshift[2];
1051                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1052                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1053                         int size = 1 << log2_trafo_size_c;
1054
1055                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1056                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1057                         for (i = 0; i < (size * size); i++) {
1058                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1059                         }
1060                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1061                     }
1062             }
1063         } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1064             int trafo_size_h = 1 << (log2_trafo_size + 1);
1065             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1066             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1067                 if (lc->cu.pred_mode == MODE_INTRA) {
1068                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1069                                                     trafo_size_h, trafo_size_v);
1070                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1071                 }
1072                 if (cbf_cb[i])
1073                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1074                                                 log2_trafo_size, scan_idx_c, 1);
1075             }
1076             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1077                 if (lc->cu.pred_mode == MODE_INTRA) {
1078                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1079                                                 trafo_size_h, trafo_size_v);
1080                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1081                 }
1082                 if (cbf_cr[i])
1083                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1084                                                 log2_trafo_size, scan_idx_c, 2);
1085             }
1086         }
1087     } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1088         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1089             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1090             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1091             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1092             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1093             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1094             if (s->sps->chroma_format_idc == 2) {
1095                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1096                                                 trafo_size_h, trafo_size_v);
1097                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1098                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1099             }
1100         } else if (blk_idx == 3) {
1101             int trafo_size_h = 1 << (log2_trafo_size + 1);
1102             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1103             ff_hevc_set_neighbour_available(s, xBase, yBase,
1104                                             trafo_size_h, trafo_size_v);
1105             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1106             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1107             if (s->sps->chroma_format_idc == 2) {
1108                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1109                                                 trafo_size_h, trafo_size_v);
1110                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1111                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1112             }
1113         }
1114     }
1115
1116     return 0;
1117 }
1118
1119 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1120 {
1121     int cb_size          = 1 << log2_cb_size;
1122     int log2_min_pu_size = s->sps->log2_min_pu_size;
1123
1124     int min_pu_width     = s->sps->min_pu_width;
1125     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1126     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1127     int i, j;
1128
1129     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1130         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1131             s->is_pcm[i + j * min_pu_width] = 2;
1132 }
1133
1134 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1135                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1136                               int log2_cb_size, int log2_trafo_size,
1137                               int trafo_depth, int blk_idx,
1138                               const int *base_cbf_cb, const int *base_cbf_cr)
1139 {
1140     HEVCLocalContext *lc = s->HEVClc;
1141     uint8_t split_transform_flag;
1142     int cbf_cb[2];
1143     int cbf_cr[2];
1144     int ret;
1145
1146     cbf_cb[0] = base_cbf_cb[0];
1147     cbf_cb[1] = base_cbf_cb[1];
1148     cbf_cr[0] = base_cbf_cr[0];
1149     cbf_cr[1] = base_cbf_cr[1];
1150
1151     if (lc->cu.intra_split_flag) {
1152         if (trafo_depth == 1) {
1153             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1154             if (s->sps->chroma_format_idc == 3) {
1155                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1156                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1157             } else {
1158                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1159                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1160             }
1161         }
1162     } else {
1163         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1164         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1165         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1166     }
1167
1168     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1169         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1170         trafo_depth     < lc->cu.max_trafo_depth       &&
1171         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1172         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1173     } else {
1174         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1175                           lc->cu.pred_mode == MODE_INTER &&
1176                           lc->cu.part_mode != PART_2Nx2N &&
1177                           trafo_depth == 0;
1178
1179         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1180                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1181                                inter_split;
1182     }
1183
1184     if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1185         if (trafo_depth == 0 || cbf_cb[0]) {
1186             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1187             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1188                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1189             }
1190         }
1191
1192         if (trafo_depth == 0 || cbf_cr[0]) {
1193             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1194             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1195                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1196             }
1197         }
1198     }
1199
1200     if (split_transform_flag) {
1201         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1202         const int x1 = x0 + trafo_size_split;
1203         const int y1 = y0 + trafo_size_split;
1204
1205 #define SUBDIVIDE(x, y, idx)                                                    \
1206 do {                                                                            \
1207     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1208                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1209                              cbf_cb, cbf_cr);                                   \
1210     if (ret < 0)                                                                \
1211         return ret;                                                             \
1212 } while (0)
1213
1214         SUBDIVIDE(x0, y0, 0);
1215         SUBDIVIDE(x1, y0, 1);
1216         SUBDIVIDE(x0, y1, 2);
1217         SUBDIVIDE(x1, y1, 3);
1218
1219 #undef SUBDIVIDE
1220     } else {
1221         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1222         int log2_min_tu_size = s->sps->log2_min_tb_size;
1223         int min_tu_width     = s->sps->min_tb_width;
1224         int cbf_luma         = 1;
1225
1226         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1227             cbf_cb[0] || cbf_cr[0] ||
1228             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1229             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1230         }
1231
1232         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1233                                  log2_cb_size, log2_trafo_size,
1234                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1235         if (ret < 0)
1236             return ret;
1237         // TODO: store cbf_luma somewhere else
1238         if (cbf_luma) {
1239             int i, j;
1240             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1241                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1242                     int x_tu = (x0 + j) >> log2_min_tu_size;
1243                     int y_tu = (y0 + i) >> log2_min_tu_size;
1244                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1245                 }
1246         }
1247         if (!s->sh.disable_deblocking_filter_flag) {
1248             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1249             if (s->pps->transquant_bypass_enable_flag &&
1250                 lc->cu.cu_transquant_bypass_flag)
1251                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1252         }
1253     }
1254     return 0;
1255 }
1256
1257 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1258 {
1259     HEVCLocalContext *lc = s->HEVClc;
1260     GetBitContext gb;
1261     int cb_size   = 1 << log2_cb_size;
1262     int stride0   = s->frame->linesize[0];
1263     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1264     int   stride1 = s->frame->linesize[1];
1265     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1266     int   stride2 = s->frame->linesize[2];
1267     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1268
1269     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1270                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1271                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1272                           s->sps->pcm.bit_depth_chroma;
1273     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1274     int ret;
1275
1276     if (!s->sh.disable_deblocking_filter_flag)
1277         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1278
1279     ret = init_get_bits(&gb, pcm, length);
1280     if (ret < 0)
1281         return ret;
1282
1283     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1284     if (s->sps->chroma_format_idc) {
1285         s->hevcdsp.put_pcm(dst1, stride1,
1286                            cb_size >> s->sps->hshift[1],
1287                            cb_size >> s->sps->vshift[1],
1288                            &gb, s->sps->pcm.bit_depth_chroma);
1289         s->hevcdsp.put_pcm(dst2, stride2,
1290                            cb_size >> s->sps->hshift[2],
1291                            cb_size >> s->sps->vshift[2],
1292                            &gb, s->sps->pcm.bit_depth_chroma);
1293     }
1294
1295     return 0;
1296 }
1297
1298 /**
1299  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1300  *
1301  * @param s HEVC decoding context
1302  * @param dst target buffer for block data at block position
1303  * @param dststride stride of the dst buffer
1304  * @param ref reference picture buffer at origin (0, 0)
1305  * @param mv motion vector (relative to block position) to get pixel data from
1306  * @param x_off horizontal position of block from origin (0, 0)
1307  * @param y_off vertical position of block from origin (0, 0)
1308  * @param block_w width of block
1309  * @param block_h height of block
1310  * @param luma_weight weighting factor applied to the luma prediction
1311  * @param luma_offset additive offset applied to the luma prediction value
1312  */
1313
1314 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1315                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1316                         int block_w, int block_h, int luma_weight, int luma_offset)
1317 {
1318     HEVCLocalContext *lc = s->HEVClc;
1319     uint8_t *src         = ref->data[0];
1320     ptrdiff_t srcstride  = ref->linesize[0];
1321     int pic_width        = s->sps->width;
1322     int pic_height       = s->sps->height;
1323     int mx               = mv->x & 3;
1324     int my               = mv->y & 3;
1325     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1326                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1327     int idx              = ff_hevc_pel_weight[block_w];
1328
1329     x_off += mv->x >> 2;
1330     y_off += mv->y >> 2;
1331     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1332
1333     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1334         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1335         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1336         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1337         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1338         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1339
1340         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1341                                  edge_emu_stride, srcstride,
1342                                  block_w + QPEL_EXTRA,
1343                                  block_h + QPEL_EXTRA,
1344                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1345                                  pic_width, pic_height);
1346         src = lc->edge_emu_buffer + buf_offset;
1347         srcstride = edge_emu_stride;
1348     }
1349
1350     if (!weight_flag)
1351         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1352                                                       block_h, mx, my, block_w);
1353     else
1354         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1355                                                         block_h, s->sh.luma_log2_weight_denom,
1356                                                         luma_weight, luma_offset, mx, my, block_w);
1357 }
1358
1359 /**
1360  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1361  *
1362  * @param s HEVC decoding context
1363  * @param dst target buffer for block data at block position
1364  * @param dststride stride of the dst buffer
1365  * @param ref0 reference picture0 buffer at origin (0, 0)
1366  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1367  * @param x_off horizontal position of block from origin (0, 0)
1368  * @param y_off vertical position of block from origin (0, 0)
1369  * @param block_w width of block
1370  * @param block_h height of block
1371  * @param ref1 reference picture1 buffer at origin (0, 0)
1372  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1373  * @param current_mv current motion vector structure
1374  */
1375  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1376                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1377                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1378 {
1379     HEVCLocalContext *lc = s->HEVClc;
1380     ptrdiff_t src0stride  = ref0->linesize[0];
1381     ptrdiff_t src1stride  = ref1->linesize[0];
1382     int pic_width        = s->sps->width;
1383     int pic_height       = s->sps->height;
1384     int mx0              = mv0->x & 3;
1385     int my0              = mv0->y & 3;
1386     int mx1              = mv1->x & 3;
1387     int my1              = mv1->y & 3;
1388     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1389                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1390     int x_off0           = x_off + (mv0->x >> 2);
1391     int y_off0           = y_off + (mv0->y >> 2);
1392     int x_off1           = x_off + (mv1->x >> 2);
1393     int y_off1           = y_off + (mv1->y >> 2);
1394     int idx              = ff_hevc_pel_weight[block_w];
1395
1396     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1397     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1398
1399     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1400         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1401         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1402         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1403         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1404         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1405
1406         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1407                                  edge_emu_stride, src0stride,
1408                                  block_w + QPEL_EXTRA,
1409                                  block_h + QPEL_EXTRA,
1410                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1411                                  pic_width, pic_height);
1412         src0 = lc->edge_emu_buffer + buf_offset;
1413         src0stride = edge_emu_stride;
1414     }
1415
1416     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1417         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1418         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1419         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1420         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1421         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1422
1423         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1424                                  edge_emu_stride, src1stride,
1425                                  block_w + QPEL_EXTRA,
1426                                  block_h + QPEL_EXTRA,
1427                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1428                                  pic_width, pic_height);
1429         src1 = lc->edge_emu_buffer2 + buf_offset;
1430         src1stride = edge_emu_stride;
1431     }
1432
1433     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1434                                                 block_h, mx0, my0, block_w);
1435     if (!weight_flag)
1436         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1437                                                        block_h, mx1, my1, block_w);
1438     else
1439         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1440                                                          block_h, s->sh.luma_log2_weight_denom,
1441                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1442                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1443                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1444                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1445                                                          mx1, my1, block_w);
1446
1447 }
1448
1449 /**
1450  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1451  *
1452  * @param s HEVC decoding context
1453  * @param dst1 target buffer for block data at block position (U plane)
1454  * @param dst2 target buffer for block data at block position (V plane)
1455  * @param dststride stride of the dst1 and dst2 buffers
1456  * @param ref reference picture buffer at origin (0, 0)
1457  * @param mv motion vector (relative to block position) to get pixel data from
1458  * @param x_off horizontal position of block from origin (0, 0)
1459  * @param y_off vertical position of block from origin (0, 0)
1460  * @param block_w width of block
1461  * @param block_h height of block
1462  * @param chroma_weight weighting factor applied to the chroma prediction
1463  * @param chroma_offset additive offset applied to the chroma prediction value
1464  */
1465
1466 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1467                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1468                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1469 {
1470     HEVCLocalContext *lc = s->HEVClc;
1471     int pic_width        = s->sps->width >> s->sps->hshift[1];
1472     int pic_height       = s->sps->height >> s->sps->vshift[1];
1473     const Mv *mv         = &current_mv->mv[reflist];
1474     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1475                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1476     int idx              = ff_hevc_pel_weight[block_w];
1477     int hshift           = s->sps->hshift[1];
1478     int vshift           = s->sps->vshift[1];
1479     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1480     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1481     intptr_t _mx         = mx << (1 - hshift);
1482     intptr_t _my         = my << (1 - vshift);
1483
1484     x_off += mv->x >> (2 + hshift);
1485     y_off += mv->y >> (2 + vshift);
1486     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1487
1488     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1489         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1490         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1491         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1492         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1493         int buf_offset0 = EPEL_EXTRA_BEFORE *
1494                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1495         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1496                                  edge_emu_stride, srcstride,
1497                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1498                                  x_off - EPEL_EXTRA_BEFORE,
1499                                  y_off - EPEL_EXTRA_BEFORE,
1500                                  pic_width, pic_height);
1501
1502         src0 = lc->edge_emu_buffer + buf_offset0;
1503         srcstride = edge_emu_stride;
1504     }
1505     if (!weight_flag)
1506         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1507                                                   block_h, _mx, _my, block_w);
1508     else
1509         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1510                                                         block_h, s->sh.chroma_log2_weight_denom,
1511                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1512 }
1513
1514 /**
1515  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1516  *
1517  * @param s HEVC decoding context
1518  * @param dst target buffer for block data at block position
1519  * @param dststride stride of the dst buffer
1520  * @param ref0 reference picture0 buffer at origin (0, 0)
1521  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1522  * @param x_off horizontal position of block from origin (0, 0)
1523  * @param y_off vertical position of block from origin (0, 0)
1524  * @param block_w width of block
1525  * @param block_h height of block
1526  * @param ref1 reference picture1 buffer at origin (0, 0)
1527  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1528  * @param current_mv current motion vector structure
1529  * @param cidx chroma component(cb, cr)
1530  */
1531 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1532                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1533 {
1534     HEVCLocalContext *lc = s->HEVClc;
1535     uint8_t *src1        = ref0->data[cidx+1];
1536     uint8_t *src2        = ref1->data[cidx+1];
1537     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1538     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1539     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1540                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1541     int pic_width        = s->sps->width >> s->sps->hshift[1];
1542     int pic_height       = s->sps->height >> s->sps->vshift[1];
1543     Mv *mv0              = &current_mv->mv[0];
1544     Mv *mv1              = &current_mv->mv[1];
1545     int hshift = s->sps->hshift[1];
1546     int vshift = s->sps->vshift[1];
1547
1548     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1549     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1550     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1551     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1552     intptr_t _mx0 = mx0 << (1 - hshift);
1553     intptr_t _my0 = my0 << (1 - vshift);
1554     intptr_t _mx1 = mx1 << (1 - hshift);
1555     intptr_t _my1 = my1 << (1 - vshift);
1556
1557     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1558     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1559     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1560     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1561     int idx = ff_hevc_pel_weight[block_w];
1562     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1563     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1564
1565     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1566         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1567         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1568         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1569         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1570         int buf_offset1 = EPEL_EXTRA_BEFORE *
1571                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1572
1573         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1574                                  edge_emu_stride, src1stride,
1575                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1576                                  x_off0 - EPEL_EXTRA_BEFORE,
1577                                  y_off0 - EPEL_EXTRA_BEFORE,
1578                                  pic_width, pic_height);
1579
1580         src1 = lc->edge_emu_buffer + buf_offset1;
1581         src1stride = edge_emu_stride;
1582     }
1583
1584     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1585         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1586         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1587         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1588         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1589         int buf_offset1 = EPEL_EXTRA_BEFORE *
1590                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1591
1592         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1593                                  edge_emu_stride, src2stride,
1594                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1595                                  x_off1 - EPEL_EXTRA_BEFORE,
1596                                  y_off1 - EPEL_EXTRA_BEFORE,
1597                                  pic_width, pic_height);
1598
1599         src2 = lc->edge_emu_buffer2 + buf_offset1;
1600         src2stride = edge_emu_stride;
1601     }
1602
1603     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1604                                                 block_h, _mx0, _my0, block_w);
1605     if (!weight_flag)
1606         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1607                                                        src2, src2stride, lc->tmp,
1608                                                        block_h, _mx1, _my1, block_w);
1609     else
1610         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1611                                                          src2, src2stride, lc->tmp,
1612                                                          block_h,
1613                                                          s->sh.chroma_log2_weight_denom,
1614                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1615                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1616                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1617                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1618                                                          _mx1, _my1, block_w);
1619 }
1620
1621 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1622                                 const Mv *mv, int y0, int height)
1623 {
1624     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1625
1626     if (s->threads_type == FF_THREAD_FRAME )
1627         ff_thread_await_progress(&ref->tf, y, 0);
1628 }
1629
1630 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1631                                   int nPbH, int log2_cb_size, int part_idx,
1632                                   int merge_idx, MvField *mv)
1633 {
1634     HEVCLocalContext *lc = s->HEVClc;
1635     enum InterPredIdc inter_pred_idc = PRED_L0;
1636     int mvp_flag;
1637
1638     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1639     mv->pred_flag = 0;
1640     if (s->sh.slice_type == B_SLICE)
1641         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1642
1643     if (inter_pred_idc != PRED_L1) {
1644         if (s->sh.nb_refs[L0])
1645             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1646
1647         mv->pred_flag = PF_L0;
1648         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1649         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1650         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1651                                  part_idx, merge_idx, mv, mvp_flag, 0);
1652         mv->mv[0].x += lc->pu.mvd.x;
1653         mv->mv[0].y += lc->pu.mvd.y;
1654     }
1655
1656     if (inter_pred_idc != PRED_L0) {
1657         if (s->sh.nb_refs[L1])
1658             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1659
1660         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1661             AV_ZERO32(&lc->pu.mvd);
1662         } else {
1663             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1664         }
1665
1666         mv->pred_flag += PF_L1;
1667         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1668         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1669                                  part_idx, merge_idx, mv, mvp_flag, 1);
1670         mv->mv[1].x += lc->pu.mvd.x;
1671         mv->mv[1].y += lc->pu.mvd.y;
1672     }
1673 }
1674
1675 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1676                                 int nPbW, int nPbH,
1677                                 int log2_cb_size, int partIdx, int idx)
1678 {
1679 #define POS(c_idx, x, y)                                                              \
1680     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1681                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1682     HEVCLocalContext *lc = s->HEVClc;
1683     int merge_idx = 0;
1684     struct MvField current_mv = {{{ 0 }}};
1685
1686     int min_pu_width = s->sps->min_pu_width;
1687
1688     MvField *tab_mvf = s->ref->tab_mvf;
1689     RefPicList  *refPicList = s->ref->refPicList;
1690     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1691     uint8_t *dst0 = POS(0, x0, y0);
1692     uint8_t *dst1 = POS(1, x0, y0);
1693     uint8_t *dst2 = POS(2, x0, y0);
1694     int log2_min_cb_size = s->sps->log2_min_cb_size;
1695     int min_cb_width     = s->sps->min_cb_width;
1696     int x_cb             = x0 >> log2_min_cb_size;
1697     int y_cb             = y0 >> log2_min_cb_size;
1698     int x_pu, y_pu;
1699     int i, j;
1700
1701     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1702
1703     if (!skip_flag)
1704         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1705
1706     if (skip_flag || lc->pu.merge_flag) {
1707         if (s->sh.max_num_merge_cand > 1)
1708             merge_idx = ff_hevc_merge_idx_decode(s);
1709         else
1710             merge_idx = 0;
1711
1712         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1713                                    partIdx, merge_idx, &current_mv);
1714     } else {
1715         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1716                               partIdx, merge_idx, &current_mv);
1717     }
1718
1719     x_pu = x0 >> s->sps->log2_min_pu_size;
1720     y_pu = y0 >> s->sps->log2_min_pu_size;
1721
1722     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1723         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1724             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1725
1726     if (current_mv.pred_flag & PF_L0) {
1727         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1728         if (!ref0)
1729             return;
1730         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1731     }
1732     if (current_mv.pred_flag & PF_L1) {
1733         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1734         if (!ref1)
1735             return;
1736         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1737     }
1738
1739     if (current_mv.pred_flag == PF_L0) {
1740         int x0_c = x0 >> s->sps->hshift[1];
1741         int y0_c = y0 >> s->sps->vshift[1];
1742         int nPbW_c = nPbW >> s->sps->hshift[1];
1743         int nPbH_c = nPbH >> s->sps->vshift[1];
1744
1745         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1746                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1747                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1748                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1749
1750         if (s->sps->chroma_format_idc) {
1751             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1752                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1753                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1754             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1755                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1756                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1757         }
1758     } else if (current_mv.pred_flag == PF_L1) {
1759         int x0_c = x0 >> s->sps->hshift[1];
1760         int y0_c = y0 >> s->sps->vshift[1];
1761         int nPbW_c = nPbW >> s->sps->hshift[1];
1762         int nPbH_c = nPbH >> s->sps->vshift[1];
1763
1764         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1765                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1766                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1767                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1768
1769         if (s->sps->chroma_format_idc) {
1770             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1771                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1772                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1773
1774             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1775                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1776                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1777         }
1778     } else if (current_mv.pred_flag == PF_BI) {
1779         int x0_c = x0 >> s->sps->hshift[1];
1780         int y0_c = y0 >> s->sps->vshift[1];
1781         int nPbW_c = nPbW >> s->sps->hshift[1];
1782         int nPbH_c = nPbH >> s->sps->vshift[1];
1783
1784         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1785                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1786                    ref1->frame, &current_mv.mv[1], &current_mv);
1787
1788         if (s->sps->chroma_format_idc) {
1789             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1790                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1791
1792             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1793                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1794         }
1795     }
1796 }
1797
1798 /**
1799  * 8.4.1
1800  */
1801 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1802                                 int prev_intra_luma_pred_flag)
1803 {
1804     HEVCLocalContext *lc = s->HEVClc;
1805     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1806     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1807     int min_pu_width     = s->sps->min_pu_width;
1808     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1809     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1810     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1811
1812     int cand_up   = (lc->ctb_up_flag || y0b) ?
1813                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1814     int cand_left = (lc->ctb_left_flag || x0b) ?
1815                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1816
1817     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1818
1819     MvField *tab_mvf = s->ref->tab_mvf;
1820     int intra_pred_mode;
1821     int candidate[3];
1822     int i, j;
1823
1824     // intra_pred_mode prediction does not cross vertical CTB boundaries
1825     if ((y0 - 1) < y_ctb)
1826         cand_up = INTRA_DC;
1827
1828     if (cand_left == cand_up) {
1829         if (cand_left < 2) {
1830             candidate[0] = INTRA_PLANAR;
1831             candidate[1] = INTRA_DC;
1832             candidate[2] = INTRA_ANGULAR_26;
1833         } else {
1834             candidate[0] = cand_left;
1835             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1836             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1837         }
1838     } else {
1839         candidate[0] = cand_left;
1840         candidate[1] = cand_up;
1841         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1842             candidate[2] = INTRA_PLANAR;
1843         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1844             candidate[2] = INTRA_DC;
1845         } else {
1846             candidate[2] = INTRA_ANGULAR_26;
1847         }
1848     }
1849
1850     if (prev_intra_luma_pred_flag) {
1851         intra_pred_mode = candidate[lc->pu.mpm_idx];
1852     } else {
1853         if (candidate[0] > candidate[1])
1854             FFSWAP(uint8_t, candidate[0], candidate[1]);
1855         if (candidate[0] > candidate[2])
1856             FFSWAP(uint8_t, candidate[0], candidate[2]);
1857         if (candidate[1] > candidate[2])
1858             FFSWAP(uint8_t, candidate[1], candidate[2]);
1859
1860         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1861         for (i = 0; i < 3; i++)
1862             if (intra_pred_mode >= candidate[i])
1863                 intra_pred_mode++;
1864     }
1865
1866     /* write the intra prediction units into the mv array */
1867     if (!size_in_pus)
1868         size_in_pus = 1;
1869     for (i = 0; i < size_in_pus; i++) {
1870         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1871                intra_pred_mode, size_in_pus);
1872
1873         for (j = 0; j < size_in_pus; j++) {
1874             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1875         }
1876     }
1877
1878     return intra_pred_mode;
1879 }
1880
1881 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1882                                           int log2_cb_size, int ct_depth)
1883 {
1884     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1885     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1886     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1887     int y;
1888
1889     for (y = 0; y < length; y++)
1890         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1891                ct_depth, length);
1892 }
1893
1894 static const uint8_t tab_mode_idx[] = {
1895      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1896     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1897
1898 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1899                                   int log2_cb_size)
1900 {
1901     HEVCLocalContext *lc = s->HEVClc;
1902     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1903     uint8_t prev_intra_luma_pred_flag[4];
1904     int split   = lc->cu.part_mode == PART_NxN;
1905     int pb_size = (1 << log2_cb_size) >> split;
1906     int side    = split + 1;
1907     int chroma_mode;
1908     int i, j;
1909
1910     for (i = 0; i < side; i++)
1911         for (j = 0; j < side; j++)
1912             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1913
1914     for (i = 0; i < side; i++) {
1915         for (j = 0; j < side; j++) {
1916             if (prev_intra_luma_pred_flag[2 * i + j])
1917                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1918             else
1919                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1920
1921             lc->pu.intra_pred_mode[2 * i + j] =
1922                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1923                                      prev_intra_luma_pred_flag[2 * i + j]);
1924         }
1925     }
1926
1927     if (s->sps->chroma_format_idc == 3) {
1928         for (i = 0; i < side; i++) {
1929             for (j = 0; j < side; j++) {
1930                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1931                 if (chroma_mode != 4) {
1932                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1933                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1934                     else
1935                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1936                 } else {
1937                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1938                 }
1939             }
1940         }
1941     } else if (s->sps->chroma_format_idc == 2) {
1942         int mode_idx;
1943         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1944         if (chroma_mode != 4) {
1945             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1946                 mode_idx = 34;
1947             else
1948                 mode_idx = intra_chroma_table[chroma_mode];
1949         } else {
1950             mode_idx = lc->pu.intra_pred_mode[0];
1951         }
1952         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1953     } else if (s->sps->chroma_format_idc != 0) {
1954         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1955         if (chroma_mode != 4) {
1956             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1957                 lc->pu.intra_pred_mode_c[0] = 34;
1958             else
1959                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1960         } else {
1961             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1962         }
1963     }
1964 }
1965
1966 static void intra_prediction_unit_default_value(HEVCContext *s,
1967                                                 int x0, int y0,
1968                                                 int log2_cb_size)
1969 {
1970     HEVCLocalContext *lc = s->HEVClc;
1971     int pb_size          = 1 << log2_cb_size;
1972     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1973     int min_pu_width     = s->sps->min_pu_width;
1974     MvField *tab_mvf     = s->ref->tab_mvf;
1975     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1976     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1977     int j, k;
1978
1979     if (size_in_pus == 0)
1980         size_in_pus = 1;
1981     for (j = 0; j < size_in_pus; j++)
1982         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1983     if (lc->cu.pred_mode == MODE_INTRA)
1984         for (j = 0; j < size_in_pus; j++)
1985             for (k = 0; k < size_in_pus; k++)
1986                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1987 }
1988
1989 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1990 {
1991     int cb_size          = 1 << log2_cb_size;
1992     HEVCLocalContext *lc = s->HEVClc;
1993     int log2_min_cb_size = s->sps->log2_min_cb_size;
1994     int length           = cb_size >> log2_min_cb_size;
1995     int min_cb_width     = s->sps->min_cb_width;
1996     int x_cb             = x0 >> log2_min_cb_size;
1997     int y_cb             = y0 >> log2_min_cb_size;
1998     int idx              = log2_cb_size - 2;
1999     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2000     int x, y, ret;
2001
2002     lc->cu.x                = x0;
2003     lc->cu.y                = y0;
2004     lc->cu.pred_mode        = MODE_INTRA;
2005     lc->cu.part_mode        = PART_2Nx2N;
2006     lc->cu.intra_split_flag = 0;
2007
2008     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2009     for (x = 0; x < 4; x++)
2010         lc->pu.intra_pred_mode[x] = 1;
2011     if (s->pps->transquant_bypass_enable_flag) {
2012         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2013         if (lc->cu.cu_transquant_bypass_flag)
2014             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2015     } else
2016         lc->cu.cu_transquant_bypass_flag = 0;
2017
2018     if (s->sh.slice_type != I_SLICE) {
2019         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2020
2021         x = y_cb * min_cb_width + x_cb;
2022         for (y = 0; y < length; y++) {
2023             memset(&s->skip_flag[x], skip_flag, length);
2024             x += min_cb_width;
2025         }
2026         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2027     } else {
2028         x = y_cb * min_cb_width + x_cb;
2029         for (y = 0; y < length; y++) {
2030             memset(&s->skip_flag[x], 0, length);
2031             x += min_cb_width;
2032         }
2033     }
2034
2035     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2036         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2037         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2038
2039         if (!s->sh.disable_deblocking_filter_flag)
2040             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2041     } else {
2042         int pcm_flag = 0;
2043
2044         if (s->sh.slice_type != I_SLICE)
2045             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2046         if (lc->cu.pred_mode != MODE_INTRA ||
2047             log2_cb_size == s->sps->log2_min_cb_size) {
2048             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2049             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2050                                       lc->cu.pred_mode == MODE_INTRA;
2051         }
2052
2053         if (lc->cu.pred_mode == MODE_INTRA) {
2054             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2055                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2056                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2057                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2058             }
2059             if (pcm_flag) {
2060                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2061                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2062                 if (s->sps->pcm.loop_filter_disable_flag)
2063                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2064
2065                 if (ret < 0)
2066                     return ret;
2067             } else {
2068                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2069             }
2070         } else {
2071             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2072             switch (lc->cu.part_mode) {
2073             case PART_2Nx2N:
2074                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2075                 break;
2076             case PART_2NxN:
2077                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2078                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2079                 break;
2080             case PART_Nx2N:
2081                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2082                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2083                 break;
2084             case PART_2NxnU:
2085                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2086                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2087                 break;
2088             case PART_2NxnD:
2089                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2090                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2091                 break;
2092             case PART_nLx2N:
2093                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2094                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2095                 break;
2096             case PART_nRx2N:
2097                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2098                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2099                 break;
2100             case PART_NxN:
2101                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2102                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2103                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2104                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2105                 break;
2106             }
2107         }
2108
2109         if (!pcm_flag) {
2110             int rqt_root_cbf = 1;
2111
2112             if (lc->cu.pred_mode != MODE_INTRA &&
2113                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2114                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2115             }
2116             if (rqt_root_cbf) {
2117                 const static int cbf[2] = { 0 };
2118                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2119                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2120                                          s->sps->max_transform_hierarchy_depth_inter;
2121                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2122                                          log2_cb_size,
2123                                          log2_cb_size, 0, 0, cbf, cbf);
2124                 if (ret < 0)
2125                     return ret;
2126             } else {
2127                 if (!s->sh.disable_deblocking_filter_flag)
2128                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2129             }
2130         }
2131     }
2132
2133     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2134         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2135
2136     x = y_cb * min_cb_width + x_cb;
2137     for (y = 0; y < length; y++) {
2138         memset(&s->qp_y_tab[x], lc->qp_y, length);
2139         x += min_cb_width;
2140     }
2141
2142     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2143        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2144         lc->qPy_pred = lc->qp_y;
2145     }
2146
2147     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2148
2149     return 0;
2150 }
2151
2152 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2153                                int log2_cb_size, int cb_depth)
2154 {
2155     HEVCLocalContext *lc = s->HEVClc;
2156     const int cb_size    = 1 << log2_cb_size;
2157     int ret;
2158     int split_cu;
2159
2160     lc->ct_depth = cb_depth;
2161     if (x0 + cb_size <= s->sps->width  &&
2162         y0 + cb_size <= s->sps->height &&
2163         log2_cb_size > s->sps->log2_min_cb_size) {
2164         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2165     } else {
2166         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2167     }
2168     if (s->pps->cu_qp_delta_enabled_flag &&
2169         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2170         lc->tu.is_cu_qp_delta_coded = 0;
2171         lc->tu.cu_qp_delta          = 0;
2172     }
2173
2174     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2175         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2176         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2177     }
2178
2179     if (split_cu) {
2180         int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2181         const int cb_size_split = cb_size >> 1;
2182         const int x1 = x0 + cb_size_split;
2183         const int y1 = y0 + cb_size_split;
2184
2185         int more_data = 0;
2186
2187         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2188         if (more_data < 0)
2189             return more_data;
2190
2191         if (more_data && x1 < s->sps->width) {
2192             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2193             if (more_data < 0)
2194                 return more_data;
2195         }
2196         if (more_data && y1 < s->sps->height) {
2197             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2198             if (more_data < 0)
2199                 return more_data;
2200         }
2201         if (more_data && x1 < s->sps->width &&
2202             y1 < s->sps->height) {
2203             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2204             if (more_data < 0)
2205                 return more_data;
2206         }
2207
2208         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2209             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2210             lc->qPy_pred = lc->qp_y;
2211
2212         if (more_data)
2213             return ((x1 + cb_size_split) < s->sps->width ||
2214                     (y1 + cb_size_split) < s->sps->height);
2215         else
2216             return 0;
2217     } else {
2218         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2219         if (ret < 0)
2220             return ret;
2221         if ((!((x0 + cb_size) %
2222                (1 << (s->sps->log2_ctb_size))) ||
2223              (x0 + cb_size >= s->sps->width)) &&
2224             (!((y0 + cb_size) %
2225                (1 << (s->sps->log2_ctb_size))) ||
2226              (y0 + cb_size >= s->sps->height))) {
2227             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2228             return !end_of_slice_flag;
2229         } else {
2230             return 1;
2231         }
2232     }
2233
2234     return 0;
2235 }
2236
2237 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2238                                  int ctb_addr_ts)
2239 {
2240     HEVCLocalContext *lc  = s->HEVClc;
2241     int ctb_size          = 1 << s->sps->log2_ctb_size;
2242     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2243     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2244
2245     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2246
2247     if (s->pps->entropy_coding_sync_enabled_flag) {
2248         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2249             lc->first_qp_group = 1;
2250         lc->end_of_tiles_x = s->sps->width;
2251     } else if (s->pps->tiles_enabled_flag) {
2252         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2253             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2254             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2255             lc->first_qp_group   = 1;
2256         }
2257     } else {
2258         lc->end_of_tiles_x = s->sps->width;
2259     }
2260
2261     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2262
2263     lc->boundary_flags = 0;
2264     if (s->pps->tiles_enabled_flag) {
2265         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2266             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2267         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2268             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2269         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2270             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2271         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2272             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2273     } else {
2274         if (!ctb_addr_in_slice > 0)
2275             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2276         if (ctb_addr_in_slice < s->sps->ctb_width)
2277             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2278     }
2279
2280     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2281     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2282     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2283     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2284 }
2285
2286 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2287 {
2288     HEVCContext *s  = avctxt->priv_data;
2289     int ctb_size    = 1 << s->sps->log2_ctb_size;
2290     int more_data   = 1;
2291     int x_ctb       = 0;
2292     int y_ctb       = 0;
2293     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2294
2295     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2296         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2297         return AVERROR_INVALIDDATA;
2298     }
2299
2300     if (s->sh.dependent_slice_segment_flag) {
2301         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2302         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2303             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2304             return AVERROR_INVALIDDATA;
2305         }
2306     }
2307
2308     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2309         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2310
2311         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2312         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2313         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2314
2315         ff_hevc_cabac_init(s, ctb_addr_ts);
2316
2317         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2318
2319         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2320         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2321         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2322
2323         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2324         if (more_data < 0) {
2325             s->tab_slice_address[ctb_addr_rs] = -1;
2326             return more_data;
2327         }
2328
2329
2330         ctb_addr_ts++;
2331         ff_hevc_save_states(s, ctb_addr_ts);
2332         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2333     }
2334
2335     if (x_ctb + ctb_size >= s->sps->width &&
2336         y_ctb + ctb_size >= s->sps->height)
2337         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2338
2339     return ctb_addr_ts;
2340 }
2341
2342 static int hls_slice_data(HEVCContext *s)
2343 {
2344     int arg[2];
2345     int ret[2];
2346
2347     arg[0] = 0;
2348     arg[1] = 1;
2349
2350     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2351     return ret[0];
2352 }
2353 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2354 {
2355     HEVCContext *s1  = avctxt->priv_data, *s;
2356     HEVCLocalContext *lc;
2357     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2358     int more_data   = 1;
2359     int *ctb_row_p    = input_ctb_row;
2360     int ctb_row = ctb_row_p[job];
2361     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2362     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2363     int thread = ctb_row % s1->threads_number;
2364     int ret;
2365
2366     s = s1->sList[self_id];
2367     lc = s->HEVClc;
2368
2369     if(ctb_row) {
2370         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2371
2372         if (ret < 0)
2373             return ret;
2374         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2375     }
2376
2377     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2378         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2379         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2380
2381         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2382
2383         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2384
2385         if (avpriv_atomic_int_get(&s1->wpp_err)){
2386             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2387             return 0;
2388         }
2389
2390         ff_hevc_cabac_init(s, ctb_addr_ts);
2391         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2392         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2393
2394         if (more_data < 0) {
2395             s->tab_slice_address[ctb_addr_rs] = -1;
2396             return more_data;
2397         }
2398
2399         ctb_addr_ts++;
2400
2401         ff_hevc_save_states(s, ctb_addr_ts);
2402         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2403         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2404
2405         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2406             avpriv_atomic_int_set(&s1->wpp_err,  1);
2407             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2408             return 0;
2409         }
2410
2411         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2412             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2413             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2414             return ctb_addr_ts;
2415         }
2416         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2417         x_ctb+=ctb_size;
2418
2419         if(x_ctb >= s->sps->width) {
2420             break;
2421         }
2422     }
2423     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2424
2425     return 0;
2426 }
2427
2428 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2429 {
2430     HEVCLocalContext *lc = s->HEVClc;
2431     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2432     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2433     int offset;
2434     int startheader, cmpt = 0;
2435     int i, j, res = 0;
2436
2437     if (!ret || !arg) {
2438         av_free(ret);
2439         av_free(arg);
2440         return AVERROR(ENOMEM);
2441     }
2442
2443
2444     if (!s->sList[1]) {
2445         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2446
2447
2448         for (i = 1; i < s->threads_number; i++) {
2449             s->sList[i] = av_malloc(sizeof(HEVCContext));
2450             memcpy(s->sList[i], s, sizeof(HEVCContext));
2451             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2452             s->sList[i]->HEVClc = s->HEVClcList[i];
2453         }
2454     }
2455
2456     offset = (lc->gb.index >> 3);
2457
2458     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2459         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2460             startheader--;
2461             cmpt++;
2462         }
2463     }
2464
2465     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2466         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2467         for (j = 0, cmpt = 0, startheader = offset
2468              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2469             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2470                 startheader--;
2471                 cmpt++;
2472             }
2473         }
2474         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2475         s->sh.offset[i - 1] = offset;
2476
2477     }
2478     if (s->sh.num_entry_point_offsets != 0) {
2479         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2480         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2481         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2482
2483     }
2484     s->data = nal;
2485
2486     for (i = 1; i < s->threads_number; i++) {
2487         s->sList[i]->HEVClc->first_qp_group = 1;
2488         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2489         memcpy(s->sList[i], s, sizeof(HEVCContext));
2490         s->sList[i]->HEVClc = s->HEVClcList[i];
2491     }
2492
2493     avpriv_atomic_int_set(&s->wpp_err, 0);
2494     ff_reset_entries(s->avctx);
2495
2496     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2497         arg[i] = i;
2498         ret[i] = 0;
2499     }
2500
2501     if (s->pps->entropy_coding_sync_enabled_flag)
2502         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2503
2504     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2505         res += ret[i];
2506     av_free(ret);
2507     av_free(arg);
2508     return res;
2509 }
2510
2511 /**
2512  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2513  * 0 if the unit should be skipped, 1 otherwise
2514  */
2515 static int hls_nal_unit(HEVCContext *s)
2516 {
2517     GetBitContext *gb = &s->HEVClc->gb;
2518     int nuh_layer_id;
2519
2520     if (get_bits1(gb) != 0)
2521         return AVERROR_INVALIDDATA;
2522
2523     s->nal_unit_type = get_bits(gb, 6);
2524
2525     nuh_layer_id   = get_bits(gb, 6);
2526     s->temporal_id = get_bits(gb, 3) - 1;
2527     if (s->temporal_id < 0)
2528         return AVERROR_INVALIDDATA;
2529
2530     av_log(s->avctx, AV_LOG_DEBUG,
2531            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2532            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2533
2534     return nuh_layer_id == 0;
2535 }
2536
2537 static int set_side_data(HEVCContext *s)
2538 {
2539     AVFrame *out = s->ref->frame;
2540
2541     if (s->sei_frame_packing_present &&
2542         s->frame_packing_arrangement_type >= 3 &&
2543         s->frame_packing_arrangement_type <= 5 &&
2544         s->content_interpretation_type > 0 &&
2545         s->content_interpretation_type < 3) {
2546         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2547         if (!stereo)
2548             return AVERROR(ENOMEM);
2549
2550         switch (s->frame_packing_arrangement_type) {
2551         case 3:
2552             if (s->quincunx_subsampling)
2553                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2554             else
2555                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2556             break;
2557         case 4:
2558             stereo->type = AV_STEREO3D_TOPBOTTOM;
2559             break;
2560         case 5:
2561             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2562             break;
2563         }
2564
2565         if (s->content_interpretation_type == 2)
2566             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2567     }
2568
2569     if (s->sei_display_orientation_present &&
2570         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2571         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2572         AVFrameSideData *rotation = av_frame_new_side_data(out,
2573                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2574                                                            sizeof(int32_t) * 9);
2575         if (!rotation)
2576             return AVERROR(ENOMEM);
2577
2578         av_display_rotation_set((int32_t *)rotation->data, angle);
2579         av_display_matrix_flip((int32_t *)rotation->data,
2580                                s->sei_hflip, s->sei_vflip);
2581     }
2582
2583     return 0;
2584 }
2585
2586 static int hevc_frame_start(HEVCContext *s)
2587 {
2588     HEVCLocalContext *lc = s->HEVClc;
2589     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2590                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2591     int ret;
2592
2593     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2594     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2595     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2596     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2597     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2598
2599     s->is_decoded        = 0;
2600     s->first_nal_type    = s->nal_unit_type;
2601
2602     if (s->pps->tiles_enabled_flag)
2603         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2604
2605     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2606     if (ret < 0)
2607         goto fail;
2608
2609     ret = ff_hevc_frame_rps(s);
2610     if (ret < 0) {
2611         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2612         goto fail;
2613     }
2614
2615     s->ref->frame->key_frame = IS_IRAP(s);
2616
2617     ret = set_side_data(s);
2618     if (ret < 0)
2619         goto fail;
2620
2621     s->frame->pict_type = 3 - s->sh.slice_type;
2622
2623     if (!IS_IRAP(s))
2624         ff_hevc_bump_frame(s);
2625
2626     av_frame_unref(s->output_frame);
2627     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2628     if (ret < 0)
2629         goto fail;
2630
2631     ff_thread_finish_setup(s->avctx);
2632
2633     return 0;
2634
2635 fail:
2636     if (s->ref)
2637         ff_hevc_unref_frame(s, s->ref, ~0);
2638     s->ref = NULL;
2639     return ret;
2640 }
2641
2642 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2643 {
2644     HEVCLocalContext *lc = s->HEVClc;
2645     GetBitContext *gb    = &lc->gb;
2646     int ctb_addr_ts, ret;
2647
2648     ret = init_get_bits8(gb, nal->data, nal->size);
2649     if (ret < 0)
2650         return ret;
2651
2652     ret = hls_nal_unit(s);
2653     if (ret < 0) {
2654         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2655                s->nal_unit_type);
2656         goto fail;
2657     } else if (!ret)
2658         return 0;
2659
2660     switch (s->nal_unit_type) {
2661     case NAL_VPS:
2662         ret = ff_hevc_decode_nal_vps(s);
2663         if (ret < 0)
2664             goto fail;
2665         break;
2666     case NAL_SPS:
2667         ret = ff_hevc_decode_nal_sps(s);
2668         if (ret < 0)
2669             goto fail;
2670         break;
2671     case NAL_PPS:
2672         ret = ff_hevc_decode_nal_pps(s);
2673         if (ret < 0)
2674             goto fail;
2675         break;
2676     case NAL_SEI_PREFIX:
2677     case NAL_SEI_SUFFIX:
2678         ret = ff_hevc_decode_nal_sei(s);
2679         if (ret < 0)
2680             goto fail;
2681         break;
2682     case NAL_TRAIL_R:
2683     case NAL_TRAIL_N:
2684     case NAL_TSA_N:
2685     case NAL_TSA_R:
2686     case NAL_STSA_N:
2687     case NAL_STSA_R:
2688     case NAL_BLA_W_LP:
2689     case NAL_BLA_W_RADL:
2690     case NAL_BLA_N_LP:
2691     case NAL_IDR_W_RADL:
2692     case NAL_IDR_N_LP:
2693     case NAL_CRA_NUT:
2694     case NAL_RADL_N:
2695     case NAL_RADL_R:
2696     case NAL_RASL_N:
2697     case NAL_RASL_R:
2698         ret = hls_slice_header(s);
2699         if (ret < 0)
2700             return ret;
2701
2702         if (s->max_ra == INT_MAX) {
2703             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2704                 s->max_ra = s->poc;
2705             } else {
2706                 if (IS_IDR(s))
2707                     s->max_ra = INT_MIN;
2708             }
2709         }
2710
2711         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2712             s->poc <= s->max_ra) {
2713             s->is_decoded = 0;
2714             break;
2715         } else {
2716             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2717                 s->max_ra = INT_MIN;
2718         }
2719
2720         if (s->sh.first_slice_in_pic_flag) {
2721             ret = hevc_frame_start(s);
2722             if (ret < 0)
2723                 return ret;
2724         } else if (!s->ref) {
2725             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2726             goto fail;
2727         }
2728
2729         if (s->nal_unit_type != s->first_nal_type) {
2730             av_log(s->avctx, AV_LOG_ERROR,
2731                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2732                    s->first_nal_type, s->nal_unit_type);
2733             return AVERROR_INVALIDDATA;
2734         }
2735
2736         if (!s->sh.dependent_slice_segment_flag &&
2737             s->sh.slice_type != I_SLICE) {
2738             ret = ff_hevc_slice_rpl(s);
2739             if (ret < 0) {
2740                 av_log(s->avctx, AV_LOG_WARNING,
2741                        "Error constructing the reference lists for the current slice.\n");
2742                 goto fail;
2743             }
2744         }
2745
2746         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2747             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2748             if (ret < 0)
2749                 goto fail;
2750         }
2751
2752         if (s->avctx->hwaccel) {
2753             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2754             if (ret < 0)
2755                 goto fail;
2756         } else {
2757             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2758                 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2759             else
2760                 ctb_addr_ts = hls_slice_data(s);
2761             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2762                 s->is_decoded = 1;
2763             }
2764
2765             if (ctb_addr_ts < 0) {
2766                 ret = ctb_addr_ts;
2767                 goto fail;
2768             }
2769         }
2770         break;
2771     case NAL_EOS_NUT:
2772     case NAL_EOB_NUT:
2773         s->seq_decode = (s->seq_decode + 1) & 0xff;
2774         s->max_ra     = INT_MAX;
2775         break;
2776     case NAL_AUD:
2777     case NAL_FD_NUT:
2778         break;
2779     default:
2780         av_log(s->avctx, AV_LOG_INFO,
2781                "Skipping NAL unit %d\n", s->nal_unit_type);
2782     }
2783
2784     return 0;
2785 fail:
2786     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2787         return ret;
2788     return 0;
2789 }
2790
2791 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2792  * between these functions would be nice. */
2793 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2794                          HEVCNAL *nal)
2795 {
2796     int i, si, di;
2797     uint8_t *dst;
2798
2799     s->skipped_bytes = 0;
2800 #define STARTCODE_TEST                                                  \
2801         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2802             if (src[i + 2] != 3) {                                      \
2803                 /* startcode, so we must be past the end */             \
2804                 length = i;                                             \
2805             }                                                           \
2806             break;                                                      \
2807         }
2808 #if HAVE_FAST_UNALIGNED
2809 #define FIND_FIRST_ZERO                                                 \
2810         if (i > 0 && !src[i])                                           \
2811             i--;                                                        \
2812         while (src[i])                                                  \
2813             i++
2814 #if HAVE_FAST_64BIT
2815     for (i = 0; i + 1 < length; i += 9) {
2816         if (!((~AV_RN64A(src + i) &
2817                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2818               0x8000800080008080ULL))
2819             continue;
2820         FIND_FIRST_ZERO;
2821         STARTCODE_TEST;
2822         i -= 7;
2823     }
2824 #else
2825     for (i = 0; i + 1 < length; i += 5) {
2826         if (!((~AV_RN32A(src + i) &
2827                (AV_RN32A(src + i) - 0x01000101U)) &
2828               0x80008080U))
2829             continue;
2830         FIND_FIRST_ZERO;
2831         STARTCODE_TEST;
2832         i -= 3;
2833     }
2834 #endif /* HAVE_FAST_64BIT */
2835 #else
2836     for (i = 0; i + 1 < length; i += 2) {
2837         if (src[i])
2838             continue;
2839         if (i > 0 && src[i - 1] == 0)
2840             i--;
2841         STARTCODE_TEST;
2842     }
2843 #endif /* HAVE_FAST_UNALIGNED */
2844
2845     if (i >= length - 1) { // no escaped 0
2846         nal->data     =
2847         nal->raw_data = src;
2848         nal->size     =
2849         nal->raw_size = length;
2850         return length;
2851     }
2852
2853     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2854                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2855     if (!nal->rbsp_buffer)
2856         return AVERROR(ENOMEM);
2857
2858     dst = nal->rbsp_buffer;
2859
2860     memcpy(dst, src, i);
2861     si = di = i;
2862     while (si + 2 < length) {
2863         // remove escapes (very rare 1:2^22)
2864         if (src[si + 2] > 3) {
2865             dst[di++] = src[si++];
2866             dst[di++] = src[si++];
2867         } else if (src[si] == 0 && src[si + 1] == 0) {
2868             if (src[si + 2] == 3) { // escape
2869                 dst[di++] = 0;
2870                 dst[di++] = 0;
2871                 si       += 3;
2872
2873                 s->skipped_bytes++;
2874                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2875                     s->skipped_bytes_pos_size *= 2;
2876                     av_reallocp_array(&s->skipped_bytes_pos,
2877                             s->skipped_bytes_pos_size,
2878                             sizeof(*s->skipped_bytes_pos));
2879                     if (!s->skipped_bytes_pos)
2880                         return AVERROR(ENOMEM);
2881                 }
2882                 if (s->skipped_bytes_pos)
2883                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2884                 continue;
2885             } else // next start code
2886                 goto nsc;
2887         }
2888
2889         dst[di++] = src[si++];
2890     }
2891     while (si < length)
2892         dst[di++] = src[si++];
2893
2894 nsc:
2895     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2896
2897     nal->data = dst;
2898     nal->size = di;
2899     nal->raw_data = src;
2900     nal->raw_size = si;
2901     return si;
2902 }
2903
2904 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2905 {
2906     int i, consumed, ret = 0;
2907
2908     s->ref = NULL;
2909     s->last_eos = s->eos;
2910     s->eos = 0;
2911
2912     /* split the input packet into NAL units, so we know the upper bound on the
2913      * number of slices in the frame */
2914     s->nb_nals = 0;
2915     while (length >= 4) {
2916         HEVCNAL *nal;
2917         int extract_length = 0;
2918
2919         if (s->is_nalff) {
2920             int i;
2921             for (i = 0; i < s->nal_length_size; i++)
2922                 extract_length = (extract_length << 8) | buf[i];
2923             buf    += s->nal_length_size;
2924             length -= s->nal_length_size;
2925
2926             if (extract_length > length) {
2927                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2928                 ret = AVERROR_INVALIDDATA;
2929                 goto fail;
2930             }
2931         } else {
2932             /* search start code */
2933             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2934                 ++buf;
2935                 --length;
2936                 if (length < 4) {
2937                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2938                     ret = AVERROR_INVALIDDATA;
2939                     goto fail;
2940                 }
2941             }
2942
2943             buf           += 3;
2944             length        -= 3;
2945         }
2946
2947         if (!s->is_nalff)
2948             extract_length = length;
2949
2950         if (s->nals_allocated < s->nb_nals + 1) {
2951             int new_size = s->nals_allocated + 1;
2952             void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2953             ret = AVERROR(ENOMEM);
2954             if (!tmp) {
2955                 goto fail;
2956             }
2957             s->nals = tmp;
2958             memset(s->nals + s->nals_allocated, 0,
2959                    (new_size - s->nals_allocated) * sizeof(*s->nals));
2960
2961             tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2962             if (!tmp)
2963                 goto fail;
2964             s->skipped_bytes_nal = tmp;
2965
2966             tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2967             if (!tmp)
2968                 goto fail;
2969             s->skipped_bytes_pos_size_nal = tmp;
2970
2971             tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2972             if (!tmp)
2973                 goto fail;
2974             s->skipped_bytes_pos_nal = tmp;
2975
2976             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2977             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2978             if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2979                 goto fail;
2980             s->nals_allocated = new_size;
2981         }
2982         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2983         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2984         nal = &s->nals[s->nb_nals];
2985
2986         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2987
2988         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2989         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2990         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2991
2992
2993         if (consumed < 0) {
2994             ret = consumed;
2995             goto fail;
2996         }
2997
2998         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2999         if (ret < 0)
3000             goto fail;
3001         hls_nal_unit(s);
3002
3003         if (s->nal_unit_type == NAL_EOB_NUT ||
3004             s->nal_unit_type == NAL_EOS_NUT)
3005             s->eos = 1;
3006
3007         buf    += consumed;
3008         length -= consumed;
3009     }
3010
3011     /* parse the NAL units */
3012     for (i = 0; i < s->nb_nals; i++) {
3013         int ret;
3014         s->skipped_bytes = s->skipped_bytes_nal[i];
3015         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
3016
3017         ret = decode_nal_unit(s, &s->nals[i]);
3018         if (ret < 0) {
3019             av_log(s->avctx, AV_LOG_WARNING,
3020                    "Error parsing NAL unit #%d.\n", i);
3021             goto fail;
3022         }
3023     }
3024
3025 fail:
3026     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3027         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3028
3029     return ret;
3030 }
3031
3032 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3033 {
3034     int i;
3035     for (i = 0; i < 16; i++)
3036         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3037 }
3038
3039 static int verify_md5(HEVCContext *s, AVFrame *frame)
3040 {
3041     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3042     int pixel_shift;
3043     int i, j;
3044
3045     if (!desc)
3046         return AVERROR(EINVAL);
3047
3048     pixel_shift = desc->comp[0].depth_minus1 > 7;
3049
3050     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3051            s->poc);
3052
3053     /* the checksums are LE, so we have to byteswap for >8bpp formats
3054      * on BE arches */
3055 #if HAVE_BIGENDIAN
3056     if (pixel_shift && !s->checksum_buf) {
3057         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3058                        FFMAX3(frame->linesize[0], frame->linesize[1],
3059                               frame->linesize[2]));
3060         if (!s->checksum_buf)
3061             return AVERROR(ENOMEM);
3062     }
3063 #endif
3064
3065     for (i = 0; frame->data[i]; i++) {
3066         int width  = s->avctx->coded_width;
3067         int height = s->avctx->coded_height;
3068         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3069         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3070         uint8_t md5[16];
3071
3072         av_md5_init(s->md5_ctx);
3073         for (j = 0; j < h; j++) {
3074             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3075 #if HAVE_BIGENDIAN
3076             if (pixel_shift) {
3077                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3078                                     (const uint16_t *) src, w);
3079                 src = s->checksum_buf;
3080             }
3081 #endif
3082             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3083         }
3084         av_md5_final(s->md5_ctx, md5);
3085
3086         if (!memcmp(md5, s->md5[i], 16)) {
3087             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3088             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3089             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3090         } else {
3091             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3092             print_md5(s->avctx, AV_LOG_ERROR, md5);
3093             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3094             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3095             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3096             return AVERROR_INVALIDDATA;
3097         }
3098     }
3099
3100     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3101
3102     return 0;
3103 }
3104
3105 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3106                              AVPacket *avpkt)
3107 {
3108     int ret;
3109     HEVCContext *s = avctx->priv_data;
3110
3111     if (!avpkt->size) {
3112         ret = ff_hevc_output_frame(s, data, 1);
3113         if (ret < 0)
3114             return ret;
3115
3116         *got_output = ret;
3117         return 0;
3118     }
3119
3120     s->ref = NULL;
3121     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3122     if (ret < 0)
3123         return ret;
3124
3125     if (avctx->hwaccel) {
3126         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3127             av_log(avctx, AV_LOG_ERROR,
3128                    "hardware accelerator failed to decode picture\n");
3129     } else {
3130         /* verify the SEI checksum */
3131         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3132             s->is_md5) {
3133             ret = verify_md5(s, s->ref->frame);
3134             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3135                 ff_hevc_unref_frame(s, s->ref, ~0);
3136                 return ret;
3137             }
3138         }
3139     }
3140     s->is_md5 = 0;
3141
3142     if (s->is_decoded) {
3143         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3144         s->is_decoded = 0;
3145     }
3146
3147     if (s->output_frame->buf[0]) {
3148         av_frame_move_ref(data, s->output_frame);
3149         *got_output = 1;
3150     }
3151
3152     return avpkt->size;
3153 }
3154
3155 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3156 {
3157     int ret;
3158
3159     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3160     if (ret < 0)
3161         return ret;
3162
3163     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3164     if (!dst->tab_mvf_buf)
3165         goto fail;
3166     dst->tab_mvf = src->tab_mvf;
3167
3168     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3169     if (!dst->rpl_tab_buf)
3170         goto fail;
3171     dst->rpl_tab = src->rpl_tab;
3172
3173     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3174     if (!dst->rpl_buf)
3175         goto fail;
3176
3177     dst->poc        = src->poc;
3178     dst->ctb_count  = src->ctb_count;
3179     dst->window     = src->window;
3180     dst->flags      = src->flags;
3181     dst->sequence   = src->sequence;
3182
3183     if (src->hwaccel_picture_private) {
3184         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3185         if (!dst->hwaccel_priv_buf)
3186             goto fail;
3187         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3188     }
3189
3190     return 0;
3191 fail:
3192     ff_hevc_unref_frame(s, dst, ~0);
3193     return AVERROR(ENOMEM);
3194 }
3195
3196 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3197 {
3198     HEVCContext       *s = avctx->priv_data;
3199     int i;
3200
3201     pic_arrays_free(s);
3202
3203     av_freep(&s->md5_ctx);
3204
3205     for(i=0; i < s->nals_allocated; i++) {
3206         av_freep(&s->skipped_bytes_pos_nal[i]);
3207     }
3208     av_freep(&s->skipped_bytes_pos_size_nal);
3209     av_freep(&s->skipped_bytes_nal);
3210     av_freep(&s->skipped_bytes_pos_nal);
3211
3212     av_freep(&s->cabac_state);
3213
3214 #ifdef USE_SAO_SMALL_BUFFER
3215     for (i = 0; i < s->threads_number; i++) {
3216         av_freep(&s->HEVClcList[i]->sao_pixel_buffer);
3217     }
3218     for (i = 0; i < 3; i++) {
3219         av_freep(&s->sao_pixel_buffer_h[i]);
3220         av_freep(&s->sao_pixel_buffer_v[i]);
3221     }
3222 #else
3223     av_frame_free(&s->tmp_frame);
3224 #endif
3225     av_frame_free(&s->output_frame);
3226
3227     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3228         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3229         av_frame_free(&s->DPB[i].frame);
3230     }
3231
3232     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3233         av_buffer_unref(&s->vps_list[i]);
3234     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3235         av_buffer_unref(&s->sps_list[i]);
3236     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3237         av_buffer_unref(&s->pps_list[i]);
3238     s->sps = NULL;
3239     s->pps = NULL;
3240     s->vps = NULL;
3241
3242     av_buffer_unref(&s->current_sps);
3243
3244     av_freep(&s->sh.entry_point_offset);
3245     av_freep(&s->sh.offset);
3246     av_freep(&s->sh.size);
3247
3248     for (i = 1; i < s->threads_number; i++) {
3249         HEVCLocalContext *lc = s->HEVClcList[i];
3250         if (lc) {
3251             av_freep(&s->HEVClcList[i]);
3252             av_freep(&s->sList[i]);
3253         }
3254     }
3255     if (s->HEVClc == s->HEVClcList[0])
3256         s->HEVClc = NULL;
3257     av_freep(&s->HEVClcList[0]);
3258
3259     for (i = 0; i < s->nals_allocated; i++)
3260         av_freep(&s->nals[i].rbsp_buffer);
3261     av_freep(&s->nals);
3262     s->nals_allocated = 0;
3263
3264     return 0;
3265 }
3266
3267 static av_cold int hevc_init_context(AVCodecContext *avctx)
3268 {
3269     HEVCContext *s = avctx->priv_data;
3270     int i;
3271
3272     s->avctx = avctx;
3273
3274     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3275     if (!s->HEVClc)
3276         goto fail;
3277     s->HEVClcList[0] = s->HEVClc;
3278     s->sList[0] = s;
3279
3280     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3281     if (!s->cabac_state)
3282         goto fail;
3283
3284 #ifndef USE_SAO_SMALL_BUFFER
3285     s->tmp_frame = av_frame_alloc();
3286     if (!s->tmp_frame)
3287         goto fail;
3288 #endif
3289
3290     s->output_frame = av_frame_alloc();
3291     if (!s->output_frame)
3292         goto fail;
3293
3294     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3295         s->DPB[i].frame = av_frame_alloc();
3296         if (!s->DPB[i].frame)
3297             goto fail;
3298         s->DPB[i].tf.f = s->DPB[i].frame;
3299     }
3300
3301     s->max_ra = INT_MAX;
3302
3303     s->md5_ctx = av_md5_alloc();
3304     if (!s->md5_ctx)
3305         goto fail;
3306
3307     ff_bswapdsp_init(&s->bdsp);
3308
3309     s->context_initialized = 1;
3310     s->eos = 0;
3311
3312     return 0;
3313
3314 fail:
3315     hevc_decode_free(avctx);
3316     return AVERROR(ENOMEM);
3317 }
3318
3319 static int hevc_update_thread_context(AVCodecContext *dst,
3320                                       const AVCodecContext *src)
3321 {
3322     HEVCContext *s  = dst->priv_data;
3323     HEVCContext *s0 = src->priv_data;
3324     int i, ret;
3325
3326     if (!s->context_initialized) {
3327         ret = hevc_init_context(dst);
3328         if (ret < 0)
3329             return ret;
3330     }
3331
3332     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3333         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3334         if (s0->DPB[i].frame->buf[0]) {
3335             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3336             if (ret < 0)
3337                 return ret;
3338         }
3339     }
3340
3341     if (s->sps != s0->sps)
3342         s->sps = NULL;
3343     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3344         av_buffer_unref(&s->vps_list[i]);
3345         if (s0->vps_list[i]) {
3346             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3347             if (!s->vps_list[i])
3348                 return AVERROR(ENOMEM);
3349         }
3350     }
3351
3352     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3353         av_buffer_unref(&s->sps_list[i]);
3354         if (s0->sps_list[i]) {
3355             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3356             if (!s->sps_list[i])
3357                 return AVERROR(ENOMEM);
3358         }
3359     }
3360
3361     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3362         av_buffer_unref(&s->pps_list[i]);
3363         if (s0->pps_list[i]) {
3364             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3365             if (!s->pps_list[i])
3366                 return AVERROR(ENOMEM);
3367         }
3368     }
3369
3370     av_buffer_unref(&s->current_sps);
3371     if (s0->current_sps) {
3372         s->current_sps = av_buffer_ref(s0->current_sps);
3373         if (!s->current_sps)
3374             return AVERROR(ENOMEM);
3375     }
3376
3377     if (s->sps != s0->sps)
3378         if ((ret = set_sps(s, s0->sps)) < 0)
3379             return ret;
3380
3381     s->seq_decode = s0->seq_decode;
3382     s->seq_output = s0->seq_output;
3383     s->pocTid0    = s0->pocTid0;
3384     s->max_ra     = s0->max_ra;
3385     s->eos        = s0->eos;
3386
3387     s->is_nalff        = s0->is_nalff;
3388     s->nal_length_size = s0->nal_length_size;
3389
3390     s->threads_number      = s0->threads_number;
3391     s->threads_type        = s0->threads_type;
3392
3393     if (s0->eos) {
3394         s->seq_decode = (s->seq_decode + 1) & 0xff;
3395         s->max_ra = INT_MAX;
3396     }
3397
3398     return 0;
3399 }
3400
3401 static int hevc_decode_extradata(HEVCContext *s)
3402 {
3403     AVCodecContext *avctx = s->avctx;
3404     GetByteContext gb;
3405     int ret;
3406
3407     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3408
3409     if (avctx->extradata_size > 3 &&
3410         (avctx->extradata[0] || avctx->extradata[1] ||
3411          avctx->extradata[2] > 1)) {
3412         /* It seems the extradata is encoded as hvcC format.
3413          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3414          * is finalized. When finalized, configurationVersion will be 1 and we
3415          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3416         int i, j, num_arrays, nal_len_size;
3417
3418         s->is_nalff = 1;
3419
3420         bytestream2_skip(&gb, 21);
3421         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3422         num_arrays   = bytestream2_get_byte(&gb);
3423
3424         /* nal units in the hvcC always have length coded with 2 bytes,
3425          * so put a fake nal_length_size = 2 while parsing them */
3426         s->nal_length_size = 2;
3427
3428         /* Decode nal units from hvcC. */
3429         for (i = 0; i < num_arrays; i++) {
3430             int type = bytestream2_get_byte(&gb) & 0x3f;
3431             int cnt  = bytestream2_get_be16(&gb);
3432
3433             for (j = 0; j < cnt; j++) {
3434                 // +2 for the nal size field
3435                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3436                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3437                     av_log(s->avctx, AV_LOG_ERROR,
3438                            "Invalid NAL unit size in extradata.\n");
3439                     return AVERROR_INVALIDDATA;
3440                 }
3441
3442                 ret = decode_nal_units(s, gb.buffer, nalsize);
3443                 if (ret < 0) {
3444                     av_log(avctx, AV_LOG_ERROR,
3445                            "Decoding nal unit %d %d from hvcC failed\n",
3446                            type, i);
3447                     return ret;
3448                 }
3449                 bytestream2_skip(&gb, nalsize);
3450             }
3451         }
3452
3453         /* Now store right nal length size, that will be used to parse
3454          * all other nals */
3455         s->nal_length_size = nal_len_size;
3456     } else {
3457         s->is_nalff = 0;
3458         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3459         if (ret < 0)
3460             return ret;
3461     }
3462     return 0;
3463 }
3464
3465 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3466 {
3467     HEVCContext *s = avctx->priv_data;
3468     int ret;
3469
3470     ff_init_cabac_states();
3471
3472     avctx->internal->allocate_progress = 1;
3473
3474     ret = hevc_init_context(avctx);
3475     if (ret < 0)
3476         return ret;
3477
3478     s->enable_parallel_tiles = 0;
3479     s->picture_struct = 0;
3480
3481     if(avctx->active_thread_type & FF_THREAD_SLICE)
3482         s->threads_number = avctx->thread_count;
3483     else
3484         s->threads_number = 1;
3485
3486     if (avctx->extradata_size > 0 && avctx->extradata) {
3487         ret = hevc_decode_extradata(s);
3488         if (ret < 0) {
3489             hevc_decode_free(avctx);
3490             return ret;
3491         }
3492     }
3493
3494     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3495             s->threads_type = FF_THREAD_FRAME;
3496         else
3497             s->threads_type = FF_THREAD_SLICE;
3498
3499     return 0;
3500 }
3501
3502 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3503 {
3504     HEVCContext *s = avctx->priv_data;
3505     int ret;
3506
3507     memset(s, 0, sizeof(*s));
3508
3509     ret = hevc_init_context(avctx);
3510     if (ret < 0)
3511         return ret;
3512
3513     return 0;
3514 }
3515
3516 static void hevc_decode_flush(AVCodecContext *avctx)
3517 {
3518     HEVCContext *s = avctx->priv_data;
3519     ff_hevc_flush_dpb(s);
3520     s->max_ra = INT_MAX;
3521 }
3522
3523 #define OFFSET(x) offsetof(HEVCContext, x)
3524 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3525
3526 static const AVProfile profiles[] = {
3527     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3528     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3529     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3530     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3531     { FF_PROFILE_UNKNOWN },
3532 };
3533
3534 static const AVOption options[] = {
3535     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3536         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3537     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3538         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3539     { NULL },
3540 };
3541
3542 static const AVClass hevc_decoder_class = {
3543     .class_name = "HEVC decoder",
3544     .item_name  = av_default_item_name,
3545     .option     = options,
3546     .version    = LIBAVUTIL_VERSION_INT,
3547 };
3548
3549 AVCodec ff_hevc_decoder = {
3550     .name                  = "hevc",
3551     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3552     .type                  = AVMEDIA_TYPE_VIDEO,
3553     .id                    = AV_CODEC_ID_HEVC,
3554     .priv_data_size        = sizeof(HEVCContext),
3555     .priv_class            = &hevc_decoder_class,
3556     .init                  = hevc_decode_init,
3557     .close                 = hevc_decode_free,
3558     .decode                = hevc_decode_frame,
3559     .flush                 = hevc_decode_flush,
3560     .update_thread_context = hevc_update_thread_context,
3561     .init_thread_copy      = hevc_init_thread_copy,
3562     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3563                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3564     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3565 };