git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc(pic_size_in_ctb);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc((sps->min_pu_width + 1) * (sps->min_pu_height + 1));
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_malloc(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146
 147     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 148     if (s->sps->chroma_format_idc != 0) {
 149         int delta = get_se_golomb(gb);
 150         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 151     }
 152
 153     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 154         luma_weight_l0_flag[i] = get_bits1(gb);
 155         if (!luma_weight_l0_flag[i]) {
 156             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 157             s->sh.luma_offset_l0[i] = 0;
 158         }
 159     }
 160     if (s->sps->chroma_format_idc != 0) {
 161         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 162             chroma_weight_l0_flag[i] = get_bits1(gb);
 163     } else {
 164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 165             chroma_weight_l0_flag[i] = 0;
 166     }
 167     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 168         if (luma_weight_l0_flag[i]) {
 169             int delta_luma_weight_l0 = get_se_golomb(gb);
 170             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 171             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 172         }
 173         if (chroma_weight_l0_flag[i]) {
 174             for (j = 0; j < 2; j++) {
 175                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 176                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 177                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 178                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 179                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 180             }
 181         } else {
 182             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 183             s->sh.chroma_offset_l0[i][0] = 0;
 184             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 185             s->sh.chroma_offset_l0[i][1] = 0;
 186         }
 187     }
 188     if (s->sh.slice_type == B_SLICE) {
 189         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 190             luma_weight_l1_flag[i] = get_bits1(gb);
 191             if (!luma_weight_l1_flag[i]) {
 192                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 193                 s->sh.luma_offset_l1[i] = 0;
 194             }
 195         }
 196         if (s->sps->chroma_format_idc != 0) {
 197             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 198                 chroma_weight_l1_flag[i] = get_bits1(gb);
 199         } else {
 200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 201                 chroma_weight_l1_flag[i] = 0;
 202         }
 203         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 204             if (luma_weight_l1_flag[i]) {
 205                 int delta_luma_weight_l1 = get_se_golomb(gb);
 206                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 207                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 208             }
 209             if (chroma_weight_l1_flag[i]) {
 210                 for (j = 0; j < 2; j++) {
 211                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 212                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 213                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 214                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 215                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 216                 }
 217             } else {
 218                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 219                 s->sh.chroma_offset_l1[i][0] = 0;
 220                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 221                 s->sh.chroma_offset_l1[i][1] = 0;
 222             }
 223         }
 224     }
 225 }
 226
 227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 228 {
 229     const HEVCSPS *sps = s->sps;
 230     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 231     int prev_delta_msb = 0;
 232     unsigned int nb_sps = 0, nb_sh;
 233     int i;
 234
 235     rps->nb_refs = 0;
 236     if (!sps->long_term_ref_pics_present_flag)
 237         return 0;
 238
 239     if (sps->num_long_term_ref_pics_sps > 0)
 240         nb_sps = get_ue_golomb_long(gb);
 241     nb_sh = get_ue_golomb_long(gb);
 242
 243     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 244         return AVERROR_INVALIDDATA;
 245
 246     rps->nb_refs = nb_sh + nb_sps;
 247
 248     for (i = 0; i < rps->nb_refs; i++) {
 249         uint8_t delta_poc_msb_present;
 250
 251         if (i < nb_sps) {
 252             uint8_t lt_idx_sps = 0;
 253
 254             if (sps->num_long_term_ref_pics_sps > 1)
 255                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 256
 257             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 258             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 259         } else {
 260             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 261             rps->used[i] = get_bits1(gb);
 262         }
 263
 264         delta_poc_msb_present = get_bits1(gb);
 265         if (delta_poc_msb_present) {
 266             int delta = get_ue_golomb_long(gb);
 267
 268             if (i && i != nb_sps)
 269                 delta += prev_delta_msb;
 270
 271             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 272             prev_delta_msb = delta;
 273         }
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int get_buffer_sao(HEVCContext *s, AVFrame *frame, HEVCSPS *sps)
 280 {
 281     int ret, i;
 282
 283     frame->width  = s->avctx->width  + 2;
 284     frame->height = s->avctx->height + 2;
 285     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 286         return ret;
 287     for (i = 0; frame->data[i]; i++) {
 288         int offset = frame->linesize[i] + (1 << sps->pixel_shift);
 289         frame->data[i] += offset;
 290     }
 291     frame->width  = s->avctx->width;
 292     frame->height = s->avctx->height;
 293
 294     return 0;
 295 }
 296
 297 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 298 {
 299     int ret;
 300     unsigned int num = 0, den = 0;
 301
 302     pic_arrays_free(s);
 303     ret = pic_arrays_init(s, sps);
 304     if (ret < 0)
 305         goto fail;
 306
 307     s->avctx->coded_width         = sps->width;
 308     s->avctx->coded_height        = sps->height;
 309     s->avctx->width               = sps->output_width;
 310     s->avctx->height              = sps->output_height;
 311     s->avctx->pix_fmt             = sps->pix_fmt;
 312     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 313
 314     ff_set_sar(s->avctx, sps->vui.sar);
 315
 316     if (sps->vui.video_signal_type_present_flag)
 317         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 318                                                                : AVCOL_RANGE_MPEG;
 319     else
 320         s->avctx->color_range = AVCOL_RANGE_MPEG;
 321
 322     if (sps->vui.colour_description_present_flag) {
 323         s->avctx->color_primaries = sps->vui.colour_primaries;
 324         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 325         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 326     } else {
 327         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 328         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 329         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 330     }
 331
 332     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 333     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 334     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 335
 336     if (sps->sao_enabled) {
 337         av_frame_unref(s->tmp_frame);
 338         ret = get_buffer_sao(s, s->tmp_frame, sps);
 339         s->sao_frame = s->tmp_frame;
 340     }
 341
 342     s->sps = sps;
 343     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 344
 345     if (s->vps->vps_timing_info_present_flag) {
 346         num = s->vps->vps_num_units_in_tick;
 347         den = s->vps->vps_time_scale;
 348     } else if (sps->vui.vui_timing_info_present_flag) {
 349         num = sps->vui.vui_num_units_in_tick;
 350         den = sps->vui.vui_time_scale;
 351     }
 352
 353     if (num != 0 && den != 0)
 354         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 355                   num, den, 1 << 30);
 356
 357     return 0;
 358
 359 fail:
 360     pic_arrays_free(s);
 361     s->sps = NULL;
 362     return ret;
 363 }
 364
 365 static int hls_slice_header(HEVCContext *s)
 366 {
 367     GetBitContext *gb = &s->HEVClc->gb;
 368     SliceHeader *sh   = &s->sh;
 369     int i, j, ret;
 370
 371     // Coded parameters
 372     sh->first_slice_in_pic_flag = get_bits1(gb);
 373     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 374         s->seq_decode = (s->seq_decode + 1) & 0xff;
 375         s->max_ra     = INT_MAX;
 376         if (IS_IDR(s))
 377             ff_hevc_clear_refs(s);
 378     }
 379     sh->no_output_of_prior_pics_flag = 0;
 380     if (IS_IRAP(s))
 381         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 382
 383     sh->pps_id = get_ue_golomb_long(gb);
 384     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 385         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 386         return AVERROR_INVALIDDATA;
 387     }
 388     if (!sh->first_slice_in_pic_flag &&
 389         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 390         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 391         return AVERROR_INVALIDDATA;
 392     }
 393     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 394     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 395         sh->no_output_of_prior_pics_flag = 1;
 396
 397     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 398         const HEVCSPS* last_sps = s->sps;
 399         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 400         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 401             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 402                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 403                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 404                 sh->no_output_of_prior_pics_flag = 0;
 405         }
 406         ff_hevc_clear_refs(s);
 407         ret = set_sps(s, s->sps);
 408         if (ret < 0)
 409             return ret;
 410
 411         s->seq_decode = (s->seq_decode + 1) & 0xff;
 412         s->max_ra     = INT_MAX;
 413     }
 414
 415     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 416     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 417
 418     sh->dependent_slice_segment_flag = 0;
 419     if (!sh->first_slice_in_pic_flag) {
 420         int slice_address_length;
 421
 422         if (s->pps->dependent_slice_segments_enabled_flag)
 423             sh->dependent_slice_segment_flag = get_bits1(gb);
 424
 425         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 426                                             s->sps->ctb_height);
 427         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 428         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 429             av_log(s->avctx, AV_LOG_ERROR,
 430                    "Invalid slice segment address: %u.\n",
 431                    sh->slice_segment_addr);
 432             return AVERROR_INVALIDDATA;
 433         }
 434
 435         if (!sh->dependent_slice_segment_flag) {
 436             sh->slice_addr = sh->slice_segment_addr;
 437             s->slice_idx++;
 438         }
 439     } else {
 440         sh->slice_segment_addr = sh->slice_addr = 0;
 441         s->slice_idx           = 0;
 442         s->slice_initialized   = 0;
 443     }
 444
 445     if (!sh->dependent_slice_segment_flag) {
 446         s->slice_initialized = 0;
 447
 448         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 449             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 450
 451         sh->slice_type = get_ue_golomb_long(gb);
 452         if (!(sh->slice_type == I_SLICE ||
 453               sh->slice_type == P_SLICE ||
 454               sh->slice_type == B_SLICE)) {
 455             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 456                    sh->slice_type);
 457             return AVERROR_INVALIDDATA;
 458         }
 459         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 460             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 461             return AVERROR_INVALIDDATA;
 462         }
 463
 464         // when flag is not present, picture is inferred to be output
 465         sh->pic_output_flag = 1;
 466         if (s->pps->output_flag_present_flag)
 467             sh->pic_output_flag = get_bits1(gb);
 468
 469         if (s->sps->separate_colour_plane_flag)
 470             sh->colour_plane_id = get_bits(gb, 2);
 471
 472         if (!IS_IDR(s)) {
 473             int short_term_ref_pic_set_sps_flag, poc;
 474
 475             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 476             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 477             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 478                 av_log(s->avctx, AV_LOG_WARNING,
 479                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 480                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 481                     return AVERROR_INVALIDDATA;
 482                 poc = s->poc;
 483             }
 484             s->poc = poc;
 485
 486             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 487             if (!short_term_ref_pic_set_sps_flag) {
 488                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 489                 if (ret < 0)
 490                     return ret;
 491
 492                 sh->short_term_rps = &sh->slice_rps;
 493             } else {
 494                 int numbits, rps_idx;
 495
 496                 if (!s->sps->nb_st_rps) {
 497                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 498                     return AVERROR_INVALIDDATA;
 499                 }
 500
 501                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 502                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 503                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 504             }
 505
 506             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 507             if (ret < 0) {
 508                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 509                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 510                     return AVERROR_INVALIDDATA;
 511             }
 512
 513             if (s->sps->sps_temporal_mvp_enabled_flag)
 514                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 515             else
 516                 sh->slice_temporal_mvp_enabled_flag = 0;
 517         } else {
 518             s->sh.short_term_rps = NULL;
 519             s->poc               = 0;
 520         }
 521
 522         /* 8.3.1 */
 523         if (s->temporal_id == 0 &&
 524             s->nal_unit_type != NAL_TRAIL_N &&
 525             s->nal_unit_type != NAL_TSA_N   &&
 526             s->nal_unit_type != NAL_STSA_N  &&
 527             s->nal_unit_type != NAL_RADL_N  &&
 528             s->nal_unit_type != NAL_RADL_R  &&
 529             s->nal_unit_type != NAL_RASL_N  &&
 530             s->nal_unit_type != NAL_RASL_R)
 531             s->pocTid0 = s->poc;
 532
 533         if (s->sps->sao_enabled) {
 534             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 535             sh->slice_sample_adaptive_offset_flag[1] =
 536             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 537         } else {
 538             sh->slice_sample_adaptive_offset_flag[0] = 0;
 539             sh->slice_sample_adaptive_offset_flag[1] = 0;
 540             sh->slice_sample_adaptive_offset_flag[2] = 0;
 541         }
 542
 543         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 544         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 545             int nb_refs;
 546
 547             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 548             if (sh->slice_type == B_SLICE)
 549                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 550
 551             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 552                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 553                 if (sh->slice_type == B_SLICE)
 554                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 555             }
 556             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 557                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 558                        sh->nb_refs[L0], sh->nb_refs[L1]);
 559                 return AVERROR_INVALIDDATA;
 560             }
 561
 562             sh->rpl_modification_flag[0] = 0;
 563             sh->rpl_modification_flag[1] = 0;
 564             nb_refs = ff_hevc_frame_nb_refs(s);
 565             if (!nb_refs) {
 566                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 567                 return AVERROR_INVALIDDATA;
 568             }
 569
 570             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 571                 sh->rpl_modification_flag[0] = get_bits1(gb);
 572                 if (sh->rpl_modification_flag[0]) {
 573                     for (i = 0; i < sh->nb_refs[L0]; i++)
 574                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 575                 }
 576
 577                 if (sh->slice_type == B_SLICE) {
 578                     sh->rpl_modification_flag[1] = get_bits1(gb);
 579                     if (sh->rpl_modification_flag[1] == 1)
 580                         for (i = 0; i < sh->nb_refs[L1]; i++)
 581                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 582                 }
 583             }
 584
 585             if (sh->slice_type == B_SLICE)
 586                 sh->mvd_l1_zero_flag = get_bits1(gb);
 587
 588             if (s->pps->cabac_init_present_flag)
 589                 sh->cabac_init_flag = get_bits1(gb);
 590             else
 591                 sh->cabac_init_flag = 0;
 592
 593             sh->collocated_ref_idx = 0;
 594             if (sh->slice_temporal_mvp_enabled_flag) {
 595                 sh->collocated_list = L0;
 596                 if (sh->slice_type == B_SLICE)
 597                     sh->collocated_list = !get_bits1(gb);
 598
 599                 if (sh->nb_refs[sh->collocated_list] > 1) {
 600                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 601                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 602                         av_log(s->avctx, AV_LOG_ERROR,
 603                                "Invalid collocated_ref_idx: %d.\n",
 604                                sh->collocated_ref_idx);
 605                         return AVERROR_INVALIDDATA;
 606                     }
 607                 }
 608             }
 609
 610             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 611                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 612                 pred_weight_table(s, gb);
 613             }
 614
 615             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 616             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 617                 av_log(s->avctx, AV_LOG_ERROR,
 618                        "Invalid number of merging MVP candidates: %d.\n",
 619                        sh->max_num_merge_cand);
 620                 return AVERROR_INVALIDDATA;
 621             }
 622         }
 623
 624         sh->slice_qp_delta = get_se_golomb(gb);
 625
 626         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 627             sh->slice_cb_qp_offset = get_se_golomb(gb);
 628             sh->slice_cr_qp_offset = get_se_golomb(gb);
 629         } else {
 630             sh->slice_cb_qp_offset = 0;
 631             sh->slice_cr_qp_offset = 0;
 632         }
 633
 634         if (s->pps->chroma_qp_offset_list_enabled_flag)
 635             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 636         else
 637             sh->cu_chroma_qp_offset_enabled_flag = 0;
 638
 639         if (s->pps->deblocking_filter_control_present_flag) {
 640             int deblocking_filter_override_flag = 0;
 641
 642             if (s->pps->deblocking_filter_override_enabled_flag)
 643                 deblocking_filter_override_flag = get_bits1(gb);
 644
 645             if (deblocking_filter_override_flag) {
 646                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 647                 if (!sh->disable_deblocking_filter_flag) {
 648                     sh->beta_offset = get_se_golomb(gb) * 2;
 649                     sh->tc_offset   = get_se_golomb(gb) * 2;
 650                 }
 651             } else {
 652                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 653                 sh->beta_offset                    = s->pps->beta_offset;
 654                 sh->tc_offset                      = s->pps->tc_offset;
 655             }
 656         } else {
 657             sh->disable_deblocking_filter_flag = 0;
 658             sh->beta_offset                    = 0;
 659             sh->tc_offset                      = 0;
 660         }
 661
 662         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 663             (sh->slice_sample_adaptive_offset_flag[0] ||
 664              sh->slice_sample_adaptive_offset_flag[1] ||
 665              !sh->disable_deblocking_filter_flag)) {
 666             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 667         } else {
 668             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 669         }
 670     } else if (!s->slice_initialized) {
 671         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 672         return AVERROR_INVALIDDATA;
 673     }
 674
 675     sh->num_entry_point_offsets = 0;
 676     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 677         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 678         if (sh->num_entry_point_offsets > 0) {
 679             int offset_len = get_ue_golomb_long(gb) + 1;
 680             int segments = offset_len >> 4;
 681             int rest = (offset_len & 15);
 682             av_freep(&sh->entry_point_offset);
 683             av_freep(&sh->offset);
 684             av_freep(&sh->size);
 685             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 686             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 687             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 688             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 689                 sh->num_entry_point_offsets = 0;
 690                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 691                 return AVERROR(ENOMEM);
 692             }
 693             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 694                 int val = 0;
 695                 for (j = 0; j < segments; j++) {
 696                     val <<= 16;
 697                     val += get_bits(gb, 16);
 698                 }
 699                 if (rest) {
 700                     val <<= rest;
 701                     val += get_bits(gb, rest);
 702                 }
 703                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 704             }
 705             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 706                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 707                 s->threads_number = 1;
 708             } else
 709                 s->enable_parallel_tiles = 0;
 710         } else
 711             s->enable_parallel_tiles = 0;
 712     }
 713
 714     if (s->pps->slice_header_extension_present_flag) {
 715         unsigned int length = get_ue_golomb_long(gb);
 716         if (length*8LL > get_bits_left(gb)) {
 717             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 718             return AVERROR_INVALIDDATA;
 719         }
 720         for (i = 0; i < length; i++)
 721             skip_bits(gb, 8);  // slice_header_extension_data_byte
 722     }
 723
 724     // Inferred parameters
 725     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 726     if (sh->slice_qp > 51 ||
 727         sh->slice_qp < -s->sps->qp_bd_offset) {
 728         av_log(s->avctx, AV_LOG_ERROR,
 729                "The slice_qp %d is outside the valid range "
 730                "[%d, 51].\n",
 731                sh->slice_qp,
 732                -s->sps->qp_bd_offset);
 733         return AVERROR_INVALIDDATA;
 734     }
 735
 736     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 737
 738     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 739         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 740         return AVERROR_INVALIDDATA;
 741     }
 742
 743     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 744
 745     if (!s->pps->cu_qp_delta_enabled_flag)
 746         s->HEVClc->qp_y = s->sh.slice_qp;
 747
 748     s->slice_initialized = 1;
 749     s->HEVClc->tu.cu_qp_offset_cb = 0;
 750     s->HEVClc->tu.cu_qp_offset_cr = 0;
 751
 752     return 0;
 753 }
 754
 755 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 756
 757 #define SET_SAO(elem, value)                            \
 758 do {                                                    \
 759     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 760         sao->elem = value;                              \
 761     else if (sao_merge_left_flag)                       \
 762         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 763     else if (sao_merge_up_flag)                         \
 764         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 765     else                                                \
 766         sao->elem = 0;                                  \
 767 } while (0)
 768
 769 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 770 {
 771     HEVCLocalContext *lc    = s->HEVClc;
 772     int sao_merge_left_flag = 0;
 773     int sao_merge_up_flag   = 0;
 774     SAOParams *sao          = &CTB(s->sao, rx, ry);
 775     int c_idx, i;
 776
 777     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 778         s->sh.slice_sample_adaptive_offset_flag[1]) {
 779         if (rx > 0) {
 780             if (lc->ctb_left_flag)
 781                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 782         }
 783         if (ry > 0 && !sao_merge_left_flag) {
 784             if (lc->ctb_up_flag)
 785                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 786         }
 787     }
 788
 789     for (c_idx = 0; c_idx < 3; c_idx++) {
 790         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 791                                                  s->pps->log2_sao_offset_scale_chroma;
 792
 793         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 794             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 795             continue;
 796         }
 797
 798         if (c_idx == 2) {
 799             sao->type_idx[2] = sao->type_idx[1];
 800             sao->eo_class[2] = sao->eo_class[1];
 801         } else {
 802             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 803         }
 804
 805         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 806             continue;
 807
 808         for (i = 0; i < 4; i++)
 809             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 810
 811         if (sao->type_idx[c_idx] == SAO_BAND) {
 812             for (i = 0; i < 4; i++) {
 813                 if (sao->offset_abs[c_idx][i]) {
 814                     SET_SAO(offset_sign[c_idx][i],
 815                             ff_hevc_sao_offset_sign_decode(s));
 816                 } else {
 817                     sao->offset_sign[c_idx][i] = 0;
 818                 }
 819             }
 820             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 821         } else if (c_idx != 2) {
 822             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 823         }
 824
 825         // Inferred parameters
 826         sao->offset_val[c_idx][0] = 0;
 827         for (i = 0; i < 4; i++) {
 828             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 829             if (sao->type_idx[c_idx] == SAO_EDGE) {
 830                 if (i > 1)
 831                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 832             } else if (sao->offset_sign[c_idx][i]) {
 833                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 834             }
 835             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 836         }
 837     }
 838 }
 839
 840 #undef SET_SAO
 841 #undef CTB
 842
 843 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 844     HEVCLocalContext *lc    = s->HEVClc;
 845     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 846
 847     if (log2_res_scale_abs_plus1 !=  0) {
 848         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 849         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 850                                (1 - 2 * res_scale_sign_flag);
 851     } else {
 852         lc->tu.res_scale_val = 0;
 853     }
 854
 855
 856     return 0;
 857 }
 858
 859 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 860                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 861                               int log2_cb_size, int log2_trafo_size,
 862                               int trafo_depth, int blk_idx)
 863 {
 864     HEVCLocalContext *lc = s->HEVClc;
 865     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 866     int i;
 867
 868     if (lc->cu.pred_mode == MODE_INTRA) {
 869         int trafo_size = 1 << log2_trafo_size;
 870         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 871
 872         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 873     }
 874
 875     if (lc->tt.cbf_luma ||
 876         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 877         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 878         (s->sps->chroma_format_idc == 2 &&
 879          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 880          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))))) {
 881         int scan_idx   = SCAN_DIAG;
 882         int scan_idx_c = SCAN_DIAG;
 883         int cbf_luma = lc->tt.cbf_luma;
 884         int cbf_chroma = SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 885                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 886                          (s->sps->chroma_format_idc == 2 &&
 887                          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 888                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))));
 889
 890         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 891             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 892             if (lc->tu.cu_qp_delta != 0)
 893                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 894                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 895             lc->tu.is_cu_qp_delta_coded = 1;
 896
 897             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 898                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 899                 av_log(s->avctx, AV_LOG_ERROR,
 900                        "The cu_qp_delta %d is outside the valid range "
 901                        "[%d, %d].\n",
 902                        lc->tu.cu_qp_delta,
 903                        -(26 + s->sps->qp_bd_offset / 2),
 904                         (25 + s->sps->qp_bd_offset / 2));
 905                 return AVERROR_INVALIDDATA;
 906             }
 907
 908             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 909         }
 910
 911         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 912             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 913             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 914             if (cu_chroma_qp_offset_flag) {
 915                 int cu_chroma_qp_offset_idx  = 0;
 916                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 917                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 918                     av_log(s->avctx, AV_LOG_ERROR,
 919                         "cu_chroma_qp_offset_idx not yet tested.\n");
 920                 }
 921                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 922                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 923             } else {
 924                 lc->tu.cu_qp_offset_cb = 0;
 925                 lc->tu.cu_qp_offset_cr = 0;
 926             }
 927             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 928         }
 929
 930         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 931             if (lc->tu.intra_pred_mode >= 6 &&
 932                 lc->tu.intra_pred_mode <= 14) {
 933                 scan_idx = SCAN_VERT;
 934             } else if (lc->tu.intra_pred_mode >= 22 &&
 935                        lc->tu.intra_pred_mode <= 30) {
 936                 scan_idx = SCAN_HORIZ;
 937             }
 938
 939             if (lc->tu.intra_pred_mode_c >=  6 &&
 940                 lc->tu.intra_pred_mode_c <= 14) {
 941                 scan_idx_c = SCAN_VERT;
 942             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 943                        lc->tu.intra_pred_mode_c <= 30) {
 944                 scan_idx_c = SCAN_HORIZ;
 945             }
 946         }
 947
 948         lc->tu.cross_pf = 0;
 949
 950         if (cbf_luma)
 951             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 952         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 953             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 954             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 955             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 956                                 (lc->cu.pred_mode == MODE_INTER ||
 957                                  (lc->tu.chroma_mode_c ==  4)));
 958
 959             if (lc->tu.cross_pf) {
 960                 hls_cross_component_pred(s, 0);
 961             }
 962             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 963                 if (lc->cu.pred_mode == MODE_INTRA) {
 964                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 965                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 966                 }
 967                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 968                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 969                                                 log2_trafo_size_c, scan_idx_c, 1);
 970                 else
 971                     if (lc->tu.cross_pf) {
 972                         ptrdiff_t stride = s->frame->linesize[1];
 973                         int hshift = s->sps->hshift[1];
 974                         int vshift = s->sps->vshift[1];
 975                         int16_t *coeffs_y = lc->tu.coeffs[0];
 976                         int16_t *coeffs =   lc->tu.coeffs[1];
 977                         int size = 1 << log2_trafo_size_c;
 978
 979                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
 980                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 981                         for (i = 0; i < (size * size); i++) {
 982                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 983                         }
 984                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
 985                     }
 986             }
 987
 988             if (lc->tu.cross_pf) {
 989                 hls_cross_component_pred(s, 1);
 990             }
 991             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 992                 if (lc->cu.pred_mode == MODE_INTRA) {
 993                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 994                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
 995                 }
 996                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 997                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 998                                                 log2_trafo_size_c, scan_idx_c, 2);
 999                 else
1000                     if (lc->tu.cross_pf) {
1001                         ptrdiff_t stride = s->frame->linesize[2];
1002                         int hshift = s->sps->hshift[2];
1003                         int vshift = s->sps->vshift[2];
1004                         int16_t *coeffs_y = lc->tu.coeffs[0];
1005                         int16_t *coeffs =   lc->tu.coeffs[1];
1006                         int size = 1 << log2_trafo_size_c;
1007
1008                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1009                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1010                         for (i = 0; i < (size * size); i++) {
1011                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1012                         }
1013                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
1014                     }
1015             }
1016         } else if (blk_idx == 3) {
1017             int trafo_size_h = 1 << (log2_trafo_size + 1);
1018             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1019             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1020                 if (lc->cu.pred_mode == MODE_INTRA) {
1021                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1022                                                     trafo_size_h, trafo_size_v);
1023                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1024                 }
1025                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1026                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1027                                                 log2_trafo_size, scan_idx_c, 1);
1028             }
1029             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1030                 if (lc->cu.pred_mode == MODE_INTRA) {
1031                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1032                                                 trafo_size_h, trafo_size_v);
1033                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1034                 }
1035                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1036                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1037                                                 log2_trafo_size, scan_idx_c, 2);
1038             }
1039         }
1040     } else if (lc->cu.pred_mode == MODE_INTRA) {
1041         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1042             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1043             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1044             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1045             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1046             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1047             if (s->sps->chroma_format_idc == 2) {
1048                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1049                                                 trafo_size_h, trafo_size_v);
1050                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1051                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1052             }
1053         } else if (blk_idx == 3) {
1054             int trafo_size_h = 1 << (log2_trafo_size + 1);
1055             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1056             ff_hevc_set_neighbour_available(s, xBase, yBase,
1057                                             trafo_size_h, trafo_size_v);
1058             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1059             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1060             if (s->sps->chroma_format_idc == 2) {
1061                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1062                                                 trafo_size_h, trafo_size_v);
1063                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1064                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1065             }
1066         }
1067     }
1068
1069     return 0;
1070 }
1071
1072 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1073 {
1074     int cb_size          = 1 << log2_cb_size;
1075     int log2_min_pu_size = s->sps->log2_min_pu_size;
1076
1077     int min_pu_width     = s->sps->min_pu_width;
1078     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1079     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1080     int i, j;
1081
1082     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1083         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1084             s->is_pcm[i + j * min_pu_width] = 2;
1085 }
1086
1087 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1088                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1089                               int log2_cb_size, int log2_trafo_size,
1090                               int trafo_depth, int blk_idx)
1091 {
1092     HEVCLocalContext *lc = s->HEVClc;
1093     uint8_t split_transform_flag;
1094     int ret;
1095
1096     if (trafo_depth > 0 && log2_trafo_size == 2) {
1097         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1098             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1099         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1100             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1101         if (s->sps->chroma_format_idc == 2) {
1102             int xBase_cb = xBase & ((1 << log2_trafo_size) - 1);
1103             int yBase_cb = yBase & ((1 << log2_trafo_size) - 1);
1104             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1105                 SAMPLE_CBF2(lc->tt.cbf_cb[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1106             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1107                 SAMPLE_CBF2(lc->tt.cbf_cr[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1108         }
1109     } else {
1110         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1111         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1112         if (s->sps->chroma_format_idc == 2) {
1113             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1114             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) = 0;
1115         }
1116     }
1117
1118     if (lc->cu.intra_split_flag) {
1119         if (trafo_depth == 1) {
1120             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1121             if (s->sps->chroma_format_idc == 3) {
1122                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1123                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1124             } else {
1125                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1126                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1127             }
1128         }
1129     } else {
1130         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1131         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1132         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1133     }
1134
1135     lc->tt.cbf_luma = 1;
1136
1137     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1138                               lc->cu.pred_mode == MODE_INTER &&
1139                               lc->cu.part_mode != PART_2Nx2N &&
1140                               trafo_depth == 0;
1141
1142     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1143         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1144         trafo_depth     < lc->cu.max_trafo_depth       &&
1145         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1146         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1147     } else {
1148         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1149                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1150                                lc->tt.inter_split_flag;
1151     }
1152
1153     if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1154         if (trafo_depth == 0 ||
1155             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1156             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1157                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1158             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1159                 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1160                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1161             }
1162         }
1163
1164         if (trafo_depth == 0 ||
1165             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1166             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1167                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1168             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1169                 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1170                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1171             }
1172         }
1173     }
1174
1175     if (split_transform_flag) {
1176         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1177         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1178
1179         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1180                                  log2_cb_size, log2_trafo_size - 1,
1181                                  trafo_depth + 1, 0);
1182         if (ret < 0)
1183             return ret;
1184         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1185                                  log2_cb_size, log2_trafo_size - 1,
1186                                  trafo_depth + 1, 1);
1187         if (ret < 0)
1188             return ret;
1189         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1190                                  log2_cb_size, log2_trafo_size - 1,
1191                                  trafo_depth + 1, 2);
1192         if (ret < 0)
1193             return ret;
1194         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1195                                  log2_cb_size, log2_trafo_size - 1,
1196                                  trafo_depth + 1, 3);
1197         if (ret < 0)
1198             return ret;
1199     } else {
1200         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1201         int log2_min_tu_size = s->sps->log2_min_tb_size;
1202         int min_tu_width     = s->sps->min_tb_width;
1203
1204         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1205             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1206             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
1207             (s->sps->chroma_format_idc == 2 &&
1208              (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) ||
1209               SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1)))))) {
1210             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1211         }
1212
1213         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1214                                  log2_cb_size, log2_trafo_size, trafo_depth,
1215                                  blk_idx);
1216         if (ret < 0)
1217             return ret;
1218         // TODO: store cbf_luma somewhere else
1219         if (lc->tt.cbf_luma) {
1220             int i, j;
1221             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1222                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1223                     int x_tu = (x0 + j) >> log2_min_tu_size;
1224                     int y_tu = (y0 + i) >> log2_min_tu_size;
1225                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1226                 }
1227         }
1228         if (!s->sh.disable_deblocking_filter_flag) {
1229             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1230             if (s->pps->transquant_bypass_enable_flag &&
1231                 lc->cu.cu_transquant_bypass_flag)
1232                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1233         }
1234     }
1235     return 0;
1236 }
1237
1238 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1239 {
1240     //TODO: non-4:2:0 support
1241     HEVCLocalContext *lc = s->HEVClc;
1242     GetBitContext gb;
1243     int cb_size   = 1 << log2_cb_size;
1244     int stride0   = s->frame->linesize[0];
1245     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1246     int   stride1 = s->frame->linesize[1];
1247     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1248     int   stride2 = s->frame->linesize[2];
1249     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1250
1251     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1252                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1253                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1254                           s->sps->pcm.bit_depth_chroma;
1255     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1256     int ret;
1257
1258     if (!s->sh.disable_deblocking_filter_flag)
1259         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1260
1261     ret = init_get_bits(&gb, pcm, length);
1262     if (ret < 0)
1263         return ret;
1264
1265     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1266     s->hevcdsp.put_pcm(dst1, stride1,
1267                        cb_size >> s->sps->hshift[1],
1268                        cb_size >> s->sps->vshift[1],
1269                        &gb, s->sps->pcm.bit_depth_chroma);
1270     s->hevcdsp.put_pcm(dst2, stride2,
1271                        cb_size >> s->sps->hshift[2],
1272                        cb_size >> s->sps->vshift[2],
1273                        &gb, s->sps->pcm.bit_depth_chroma);
1274     return 0;
1275 }
1276
1277 /**
1278  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1279  *
1280  * @param s HEVC decoding context
1281  * @param dst target buffer for block data at block position
1282  * @param dststride stride of the dst buffer
1283  * @param ref reference picture buffer at origin (0, 0)
1284  * @param mv motion vector (relative to block position) to get pixel data from
1285  * @param x_off horizontal position of block from origin (0, 0)
1286  * @param y_off vertical position of block from origin (0, 0)
1287  * @param block_w width of block
1288  * @param block_h height of block
1289  * @param luma_weight weighting factor applied to the luma prediction
1290  * @param luma_offset additive offset applied to the luma prediction value
1291  */
1292
1293 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1294                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1295                         int block_w, int block_h, int luma_weight, int luma_offset)
1296 {
1297     HEVCLocalContext *lc = s->HEVClc;
1298     uint8_t *src         = ref->data[0];
1299     ptrdiff_t srcstride  = ref->linesize[0];
1300     int pic_width        = s->sps->width;
1301     int pic_height       = s->sps->height;
1302     int mx               = mv->x & 3;
1303     int my               = mv->y & 3;
1304     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1305                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1306     int idx              = ff_hevc_pel_weight[block_w];
1307
1308     x_off += mv->x >> 2;
1309     y_off += mv->y >> 2;
1310     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1311
1312     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1313         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1314         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1315         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1316         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1317         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1318
1319         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1320                                  edge_emu_stride, srcstride,
1321                                  block_w + QPEL_EXTRA,
1322                                  block_h + QPEL_EXTRA,
1323                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1324                                  pic_width, pic_height);
1325         src = lc->edge_emu_buffer + buf_offset;
1326         srcstride = edge_emu_stride;
1327     }
1328
1329     if (!weight_flag)
1330         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1331                                                       block_h, mx, my, block_w);
1332     else
1333         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1334                                                         block_h, s->sh.luma_log2_weight_denom,
1335                                                         luma_weight, luma_offset, mx, my, block_w);
1336 }
1337
1338 /**
1339  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1340  *
1341  * @param s HEVC decoding context
1342  * @param dst target buffer for block data at block position
1343  * @param dststride stride of the dst buffer
1344  * @param ref0 reference picture0 buffer at origin (0, 0)
1345  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1346  * @param x_off horizontal position of block from origin (0, 0)
1347  * @param y_off vertical position of block from origin (0, 0)
1348  * @param block_w width of block
1349  * @param block_h height of block
1350  * @param ref1 reference picture1 buffer at origin (0, 0)
1351  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1352  * @param current_mv current motion vector structure
1353  */
1354  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1355                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1356                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1357 {
1358     HEVCLocalContext *lc = s->HEVClc;
1359     DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1360     ptrdiff_t src0stride  = ref0->linesize[0];
1361     ptrdiff_t src1stride  = ref1->linesize[0];
1362     int pic_width        = s->sps->width;
1363     int pic_height       = s->sps->height;
1364     int mx0              = mv0->x & 3;
1365     int my0              = mv0->y & 3;
1366     int mx1              = mv1->x & 3;
1367     int my1              = mv1->y & 3;
1368     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1369                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1370     int x_off0           = x_off + (mv0->x >> 2);
1371     int y_off0           = y_off + (mv0->y >> 2);
1372     int x_off1           = x_off + (mv1->x >> 2);
1373     int y_off1           = y_off + (mv1->y >> 2);
1374     int idx              = ff_hevc_pel_weight[block_w];
1375
1376     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1377     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1378
1379     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1380         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1381         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1382         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1383         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1384         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1385
1386         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1387                                  edge_emu_stride, src0stride,
1388                                  block_w + QPEL_EXTRA,
1389                                  block_h + QPEL_EXTRA,
1390                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1391                                  pic_width, pic_height);
1392         src0 = lc->edge_emu_buffer + buf_offset;
1393         src0stride = edge_emu_stride;
1394     }
1395
1396     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1397         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1398         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1399         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1400         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1401         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1402
1403         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1404                                  edge_emu_stride, src1stride,
1405                                  block_w + QPEL_EXTRA,
1406                                  block_h + QPEL_EXTRA,
1407                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1408                                  pic_width, pic_height);
1409         src1 = lc->edge_emu_buffer2 + buf_offset;
1410         src1stride = edge_emu_stride;
1411     }
1412
1413     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
1414                                                 block_h, mx0, my0, block_w);
1415     if (!weight_flag)
1416         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1417                                                        block_h, mx1, my1, block_w);
1418     else
1419         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1420                                                          block_h, s->sh.luma_log2_weight_denom,
1421                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1422                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1423                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1424                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1425                                                          mx1, my1, block_w);
1426
1427 }
1428
1429 /**
1430  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1431  *
1432  * @param s HEVC decoding context
1433  * @param dst1 target buffer for block data at block position (U plane)
1434  * @param dst2 target buffer for block data at block position (V plane)
1435  * @param dststride stride of the dst1 and dst2 buffers
1436  * @param ref reference picture buffer at origin (0, 0)
1437  * @param mv motion vector (relative to block position) to get pixel data from
1438  * @param x_off horizontal position of block from origin (0, 0)
1439  * @param y_off vertical position of block from origin (0, 0)
1440  * @param block_w width of block
1441  * @param block_h height of block
1442  * @param chroma_weight weighting factor applied to the chroma prediction
1443  * @param chroma_offset additive offset applied to the chroma prediction value
1444  */
1445
1446 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1447                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1448                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1449 {
1450     HEVCLocalContext *lc = s->HEVClc;
1451     int pic_width        = s->sps->width >> s->sps->hshift[1];
1452     int pic_height       = s->sps->height >> s->sps->vshift[1];
1453     const Mv *mv         = &current_mv->mv[reflist];
1454     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1455                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1456     int idx              = ff_hevc_pel_weight[block_w];
1457     int hshift           = s->sps->hshift[1];
1458     int vshift           = s->sps->vshift[1];
1459     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1460     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1461     intptr_t _mx         = mx << (1 - hshift);
1462     intptr_t _my         = my << (1 - vshift);
1463
1464     x_off += mv->x >> (2 + hshift);
1465     y_off += mv->y >> (2 + vshift);
1466     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1467
1468     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1469         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1470         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1471         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1472         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1473         int buf_offset0 = EPEL_EXTRA_BEFORE *
1474                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1475         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1476                                  edge_emu_stride, srcstride,
1477                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1478                                  x_off - EPEL_EXTRA_BEFORE,
1479                                  y_off - EPEL_EXTRA_BEFORE,
1480                                  pic_width, pic_height);
1481
1482         src0 = lc->edge_emu_buffer + buf_offset0;
1483         srcstride = edge_emu_stride;
1484     }
1485     if (!weight_flag)
1486         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1487                                                   block_h, _mx, _my, block_w);
1488     else
1489         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1490                                                         block_h, s->sh.chroma_log2_weight_denom,
1491                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1492 }
1493
1494 /**
1495  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1496  *
1497  * @param s HEVC decoding context
1498  * @param dst target buffer for block data at block position
1499  * @param dststride stride of the dst buffer
1500  * @param ref0 reference picture0 buffer at origin (0, 0)
1501  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1502  * @param x_off horizontal position of block from origin (0, 0)
1503  * @param y_off vertical position of block from origin (0, 0)
1504  * @param block_w width of block
1505  * @param block_h height of block
1506  * @param ref1 reference picture1 buffer at origin (0, 0)
1507  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1508  * @param current_mv current motion vector structure
1509  * @param cidx chroma component(cb, cr)
1510  */
1511 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1512                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1513 {
1514     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1515     int tmpstride = MAX_PB_SIZE;
1516     HEVCLocalContext *lc = s->HEVClc;
1517     uint8_t *src1        = ref0->data[cidx+1];
1518     uint8_t *src2        = ref1->data[cidx+1];
1519     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1520     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1521     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1522                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1523     int pic_width        = s->sps->width >> s->sps->hshift[1];
1524     int pic_height       = s->sps->height >> s->sps->vshift[1];
1525     Mv *mv0              = &current_mv->mv[0];
1526     Mv *mv1              = &current_mv->mv[1];
1527     int hshift = s->sps->hshift[1];
1528     int vshift = s->sps->vshift[1];
1529
1530     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1531     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1532     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1533     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1534     intptr_t _mx0 = mx0 << (1 - hshift);
1535     intptr_t _my0 = my0 << (1 - vshift);
1536     intptr_t _mx1 = mx1 << (1 - hshift);
1537     intptr_t _my1 = my1 << (1 - vshift);
1538
1539     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1540     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1541     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1542     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1543     int idx = ff_hevc_pel_weight[block_w];
1544     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1545     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1546
1547     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1548         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1549         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1550         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1551         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1552         int buf_offset1 = EPEL_EXTRA_BEFORE *
1553                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1554
1555         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1556                                  edge_emu_stride, src1stride,
1557                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1558                                  x_off0 - EPEL_EXTRA_BEFORE,
1559                                  y_off0 - EPEL_EXTRA_BEFORE,
1560                                  pic_width, pic_height);
1561
1562         src1 = lc->edge_emu_buffer + buf_offset1;
1563         src1stride = edge_emu_stride;
1564     }
1565
1566     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1567         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1568         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1569         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1570         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1571         int buf_offset1 = EPEL_EXTRA_BEFORE *
1572                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1573
1574         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1575                                  edge_emu_stride, src2stride,
1576                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1577                                  x_off1 - EPEL_EXTRA_BEFORE,
1578                                  y_off1 - EPEL_EXTRA_BEFORE,
1579                                  pic_width, pic_height);
1580
1581         src2 = lc->edge_emu_buffer2 + buf_offset1;
1582         src2stride = edge_emu_stride;
1583     }
1584
1585     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
1586                                                 block_h, _mx0, _my0, block_w);
1587     if (!weight_flag)
1588         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1589                                                        src2, src2stride, tmp, tmpstride,
1590                                                        block_h, _mx1, _my1, block_w);
1591     else
1592         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1593                                                          src2, src2stride, tmp, tmpstride,
1594                                                          block_h,
1595                                                          s->sh.chroma_log2_weight_denom,
1596                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1597                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1598                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1599                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1600                                                          _mx1, _my1, block_w);
1601 }
1602
1603 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1604                                 const Mv *mv, int y0, int height)
1605 {
1606     int y = (mv->y >> 2) + y0 + height + 9;
1607
1608     if (s->threads_type == FF_THREAD_FRAME )
1609         ff_thread_await_progress(&ref->tf, y, 0);
1610 }
1611
1612 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1613                                 int nPbW, int nPbH,
1614                                 int log2_cb_size, int partIdx, int idx)
1615 {
1616 #define POS(c_idx, x, y)                                                              \
1617     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1618                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1619     HEVCLocalContext *lc = s->HEVClc;
1620     int merge_idx = 0;
1621     struct MvField current_mv = {{{ 0 }}};
1622
1623     int min_pu_width = s->sps->min_pu_width;
1624
1625     MvField *tab_mvf = s->ref->tab_mvf;
1626     RefPicList  *refPicList = s->ref->refPicList;
1627     HEVCFrame *ref0, *ref1;
1628     uint8_t *dst0 = POS(0, x0, y0);
1629     uint8_t *dst1 = POS(1, x0, y0);
1630     uint8_t *dst2 = POS(2, x0, y0);
1631     int log2_min_cb_size = s->sps->log2_min_cb_size;
1632     int min_cb_width     = s->sps->min_cb_width;
1633     int x_cb             = x0 >> log2_min_cb_size;
1634     int y_cb             = y0 >> log2_min_cb_size;
1635     int ref_idx[2];
1636     int mvp_flag[2];
1637     int x_pu, y_pu;
1638     int i, j;
1639
1640     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1641         if (s->sh.max_num_merge_cand > 1)
1642             merge_idx = ff_hevc_merge_idx_decode(s);
1643         else
1644             merge_idx = 0;
1645
1646         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1647                                    1 << log2_cb_size,
1648                                    1 << log2_cb_size,
1649                                    log2_cb_size, partIdx,
1650                                    merge_idx, &current_mv);
1651         x_pu = x0 >> s->sps->log2_min_pu_size;
1652         y_pu = y0 >> s->sps->log2_min_pu_size;
1653
1654         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1655             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1656                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1657     } else { /* MODE_INTER */
1658         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1659         if (lc->pu.merge_flag) {
1660             if (s->sh.max_num_merge_cand > 1)
1661                 merge_idx = ff_hevc_merge_idx_decode(s);
1662             else
1663                 merge_idx = 0;
1664
1665             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1666                                        partIdx, merge_idx, &current_mv);
1667             x_pu = x0 >> s->sps->log2_min_pu_size;
1668             y_pu = y0 >> s->sps->log2_min_pu_size;
1669
1670             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1671                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1672                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1673         } else {
1674             enum InterPredIdc inter_pred_idc = PRED_L0;
1675             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1676             current_mv.pred_flag = 0;
1677             if (s->sh.slice_type == B_SLICE)
1678                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1679
1680             if (inter_pred_idc != PRED_L1) {
1681                 if (s->sh.nb_refs[L0]) {
1682                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1683                     current_mv.ref_idx[0] = ref_idx[0];
1684                 }
1685                 current_mv.pred_flag = PF_L0;
1686                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1687                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1688                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1689                                          partIdx, merge_idx, &current_mv,
1690                                          mvp_flag[0], 0);
1691                 current_mv.mv[0].x += lc->pu.mvd.x;
1692                 current_mv.mv[0].y += lc->pu.mvd.y;
1693             }
1694
1695             if (inter_pred_idc != PRED_L0) {
1696                 if (s->sh.nb_refs[L1]) {
1697                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1698                     current_mv.ref_idx[1] = ref_idx[1];
1699                 }
1700
1701                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1702                     AV_ZERO32(&lc->pu.mvd);
1703                 } else {
1704                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1705                 }
1706
1707                 current_mv.pred_flag += PF_L1;
1708                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1709                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710                                          partIdx, merge_idx, &current_mv,
1711                                          mvp_flag[1], 1);
1712                 current_mv.mv[1].x += lc->pu.mvd.x;
1713                 current_mv.mv[1].y += lc->pu.mvd.y;
1714             }
1715
1716             x_pu = x0 >> s->sps->log2_min_pu_size;
1717             y_pu = y0 >> s->sps->log2_min_pu_size;
1718
1719             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1720                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1721                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1722         }
1723     }
1724
1725     if (current_mv.pred_flag & PF_L0) {
1726         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1727         if (!ref0)
1728             return;
1729         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1730     }
1731     if (current_mv.pred_flag & PF_L1) {
1732         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1733         if (!ref1)
1734             return;
1735         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1736     }
1737
1738     if (current_mv.pred_flag == PF_L0) {
1739         int x0_c = x0 >> s->sps->hshift[1];
1740         int y0_c = y0 >> s->sps->vshift[1];
1741         int nPbW_c = nPbW >> s->sps->hshift[1];
1742         int nPbH_c = nPbH >> s->sps->vshift[1];
1743
1744         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1745                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1746                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1747                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1748
1749         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1750                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1751                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1752         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1753                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1754                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1755     } else if (current_mv.pred_flag == PF_L1) {
1756         int x0_c = x0 >> s->sps->hshift[1];
1757         int y0_c = y0 >> s->sps->vshift[1];
1758         int nPbW_c = nPbW >> s->sps->hshift[1];
1759         int nPbH_c = nPbH >> s->sps->vshift[1];
1760
1761         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1762                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1763                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1764                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1765
1766         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1767                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1768                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1769
1770         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1771                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1772                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1773     } else if (current_mv.pred_flag == PF_BI) {
1774         int x0_c = x0 >> s->sps->hshift[1];
1775         int y0_c = y0 >> s->sps->vshift[1];
1776         int nPbW_c = nPbW >> s->sps->hshift[1];
1777         int nPbH_c = nPbH >> s->sps->vshift[1];
1778
1779         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1780                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1781                    ref1->frame, &current_mv.mv[1], &current_mv);
1782
1783         chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1784                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1785
1786         chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1787                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1788     }
1789 }
1790
1791 /**
1792  * 8.4.1
1793  */
1794 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1795                                 int prev_intra_luma_pred_flag)
1796 {
1797     HEVCLocalContext *lc = s->HEVClc;
1798     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1799     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1800     int min_pu_width     = s->sps->min_pu_width;
1801     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1802     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1803     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1804
1805     int cand_up   = (lc->ctb_up_flag || y0b) ?
1806                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1807     int cand_left = (lc->ctb_left_flag || x0b) ?
1808                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1809
1810     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1811
1812     MvField *tab_mvf = s->ref->tab_mvf;
1813     int intra_pred_mode;
1814     int candidate[3];
1815     int i, j;
1816
1817     // intra_pred_mode prediction does not cross vertical CTB boundaries
1818     if ((y0 - 1) < y_ctb)
1819         cand_up = INTRA_DC;
1820
1821     if (cand_left == cand_up) {
1822         if (cand_left < 2) {
1823             candidate[0] = INTRA_PLANAR;
1824             candidate[1] = INTRA_DC;
1825             candidate[2] = INTRA_ANGULAR_26;
1826         } else {
1827             candidate[0] = cand_left;
1828             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1829             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1830         }
1831     } else {
1832         candidate[0] = cand_left;
1833         candidate[1] = cand_up;
1834         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1835             candidate[2] = INTRA_PLANAR;
1836         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1837             candidate[2] = INTRA_DC;
1838         } else {
1839             candidate[2] = INTRA_ANGULAR_26;
1840         }
1841     }
1842
1843     if (prev_intra_luma_pred_flag) {
1844         intra_pred_mode = candidate[lc->pu.mpm_idx];
1845     } else {
1846         if (candidate[0] > candidate[1])
1847             FFSWAP(uint8_t, candidate[0], candidate[1]);
1848         if (candidate[0] > candidate[2])
1849             FFSWAP(uint8_t, candidate[0], candidate[2]);
1850         if (candidate[1] > candidate[2])
1851             FFSWAP(uint8_t, candidate[1], candidate[2]);
1852
1853         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1854         for (i = 0; i < 3; i++)
1855             if (intra_pred_mode >= candidate[i])
1856                 intra_pred_mode++;
1857     }
1858
1859     /* write the intra prediction units into the mv array */
1860     if (!size_in_pus)
1861         size_in_pus = 1;
1862     for (i = 0; i < size_in_pus; i++) {
1863         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1864                intra_pred_mode, size_in_pus);
1865
1866         for (j = 0; j < size_in_pus; j++) {
1867             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1868         }
1869     }
1870
1871     return intra_pred_mode;
1872 }
1873
1874 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1875                                           int log2_cb_size, int ct_depth)
1876 {
1877     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1878     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1879     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1880     int y;
1881
1882     for (y = 0; y < length; y++)
1883         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1884                ct_depth, length);
1885 }
1886
1887 static const uint8_t tab_mode_idx[] = {
1888      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1889     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1890
1891 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1892                                   int log2_cb_size)
1893 {
1894     HEVCLocalContext *lc = s->HEVClc;
1895     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1896     uint8_t prev_intra_luma_pred_flag[4];
1897     int split   = lc->cu.part_mode == PART_NxN;
1898     int pb_size = (1 << log2_cb_size) >> split;
1899     int side    = split + 1;
1900     int chroma_mode;
1901     int i, j;
1902
1903     for (i = 0; i < side; i++)
1904         for (j = 0; j < side; j++)
1905             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1906
1907     for (i = 0; i < side; i++) {
1908         for (j = 0; j < side; j++) {
1909             if (prev_intra_luma_pred_flag[2 * i + j])
1910                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1911             else
1912                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1913
1914             lc->pu.intra_pred_mode[2 * i + j] =
1915                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1916                                      prev_intra_luma_pred_flag[2 * i + j]);
1917         }
1918     }
1919
1920     if (s->sps->chroma_format_idc == 3) {
1921         for (i = 0; i < side; i++) {
1922             for (j = 0; j < side; j++) {
1923                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1924                 if (chroma_mode != 4) {
1925                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1926                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1927                     else
1928                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1929                 } else {
1930                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1931                 }
1932             }
1933         }
1934     } else if (s->sps->chroma_format_idc == 2) {
1935         int mode_idx;
1936         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1937         if (chroma_mode != 4) {
1938             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1939                 mode_idx = 34;
1940             else
1941                 mode_idx = intra_chroma_table[chroma_mode];
1942         } else {
1943             mode_idx = lc->pu.intra_pred_mode[0];
1944         }
1945         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1946     } else if (s->sps->chroma_format_idc != 0) {
1947         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1948         if (chroma_mode != 4) {
1949             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1950                 lc->pu.intra_pred_mode_c[0] = 34;
1951             else
1952                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1953         } else {
1954             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1955         }
1956     }
1957 }
1958
1959 static void intra_prediction_unit_default_value(HEVCContext *s,
1960                                                 int x0, int y0,
1961                                                 int log2_cb_size)
1962 {
1963     HEVCLocalContext *lc = s->HEVClc;
1964     int pb_size          = 1 << log2_cb_size;
1965     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1966     int min_pu_width     = s->sps->min_pu_width;
1967     MvField *tab_mvf     = s->ref->tab_mvf;
1968     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1969     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1970     int j, k;
1971
1972     if (size_in_pus == 0)
1973         size_in_pus = 1;
1974     for (j = 0; j < size_in_pus; j++)
1975         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1976     if (lc->cu.pred_mode == MODE_INTRA)
1977         for (j = 0; j < size_in_pus; j++)
1978             for (k = 0; k < size_in_pus; k++)
1979                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1980 }
1981
1982 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1983 {
1984     int cb_size          = 1 << log2_cb_size;
1985     HEVCLocalContext *lc = s->HEVClc;
1986     int log2_min_cb_size = s->sps->log2_min_cb_size;
1987     int length           = cb_size >> log2_min_cb_size;
1988     int min_cb_width     = s->sps->min_cb_width;
1989     int x_cb             = x0 >> log2_min_cb_size;
1990     int y_cb             = y0 >> log2_min_cb_size;
1991     int idx              = log2_cb_size - 2;
1992     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1993     int x, y, ret;
1994
1995     lc->cu.x                = x0;
1996     lc->cu.y                = y0;
1997     lc->cu.rqt_root_cbf     = 1;
1998     lc->cu.pred_mode        = MODE_INTRA;
1999     lc->cu.part_mode        = PART_2Nx2N;
2000     lc->cu.intra_split_flag = 0;
2001     lc->cu.pcm_flag         = 0;
2002
2003     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2004     for (x = 0; x < 4; x++)
2005         lc->pu.intra_pred_mode[x] = 1;
2006     if (s->pps->transquant_bypass_enable_flag) {
2007         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2008         if (lc->cu.cu_transquant_bypass_flag)
2009             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2010     } else
2011         lc->cu.cu_transquant_bypass_flag = 0;
2012
2013     if (s->sh.slice_type != I_SLICE) {
2014         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2015
2016         x = y_cb * min_cb_width + x_cb;
2017         for (y = 0; y < length; y++) {
2018             memset(&s->skip_flag[x], skip_flag, length);
2019             x += min_cb_width;
2020         }
2021         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2022     }
2023
2024     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2025         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2026         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2027
2028         if (!s->sh.disable_deblocking_filter_flag)
2029             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2030     } else {
2031         if (s->sh.slice_type != I_SLICE)
2032             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2033         if (lc->cu.pred_mode != MODE_INTRA ||
2034             log2_cb_size == s->sps->log2_min_cb_size) {
2035             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2036             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2037                                       lc->cu.pred_mode == MODE_INTRA;
2038         }
2039
2040         if (lc->cu.pred_mode == MODE_INTRA) {
2041             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2042                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2043                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2044                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2045             }
2046             if (lc->cu.pcm_flag) {
2047                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2048                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2049                 if (s->sps->pcm.loop_filter_disable_flag)
2050                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2051
2052                 if (ret < 0)
2053                     return ret;
2054             } else {
2055                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2056             }
2057         } else {
2058             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2059             switch (lc->cu.part_mode) {
2060             case PART_2Nx2N:
2061                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2062                 break;
2063             case PART_2NxN:
2064                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2065                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2066                 break;
2067             case PART_Nx2N:
2068                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2069                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2070                 break;
2071             case PART_2NxnU:
2072                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2073                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2074                 break;
2075             case PART_2NxnD:
2076                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2077                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2078                 break;
2079             case PART_nLx2N:
2080                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2081                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2082                 break;
2083             case PART_nRx2N:
2084                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2085                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2086                 break;
2087             case PART_NxN:
2088                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2089                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2090                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2091                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2092                 break;
2093             }
2094         }
2095
2096         if (!lc->cu.pcm_flag) {
2097             if (lc->cu.pred_mode != MODE_INTRA &&
2098                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2099                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2100             }
2101             if (lc->cu.rqt_root_cbf) {
2102                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2103                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2104                                          s->sps->max_transform_hierarchy_depth_inter;
2105                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2106                                          log2_cb_size,
2107                                          log2_cb_size, 0, 0);
2108                 if (ret < 0)
2109                     return ret;
2110             } else {
2111                 if (!s->sh.disable_deblocking_filter_flag)
2112                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2113             }
2114         }
2115     }
2116
2117     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2118         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2119
2120     x = y_cb * min_cb_width + x_cb;
2121     for (y = 0; y < length; y++) {
2122         memset(&s->qp_y_tab[x], lc->qp_y, length);
2123         x += min_cb_width;
2124     }
2125
2126     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2127        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2128         lc->qPy_pred = lc->qp_y;
2129     }
2130
2131     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2132
2133     return 0;
2134 }
2135
2136 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2137                                int log2_cb_size, int cb_depth)
2138 {
2139     HEVCLocalContext *lc = s->HEVClc;
2140     const int cb_size    = 1 << log2_cb_size;
2141     int ret;
2142     int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2143     int split_cu_flag;
2144
2145     lc->ct.depth = cb_depth;
2146     if (x0 + cb_size <= s->sps->width  &&
2147         y0 + cb_size <= s->sps->height &&
2148         log2_cb_size > s->sps->log2_min_cb_size) {
2149         split_cu_flag = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2150     } else {
2151         split_cu_flag = (log2_cb_size > s->sps->log2_min_cb_size);
2152     }
2153     if (s->pps->cu_qp_delta_enabled_flag &&
2154         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2155         lc->tu.is_cu_qp_delta_coded = 0;
2156         lc->tu.cu_qp_delta          = 0;
2157     }
2158
2159     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2160         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2161         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2162     }
2163
2164     if (split_cu_flag) {
2165         const int cb_size_split = cb_size >> 1;
2166         const int x1 = x0 + cb_size_split;
2167         const int y1 = y0 + cb_size_split;
2168
2169         int more_data = 0;
2170
2171         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2172         if (more_data < 0)
2173             return more_data;
2174
2175         if (more_data && x1 < s->sps->width) {
2176             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2177             if (more_data < 0)
2178                 return more_data;
2179         }
2180         if (more_data && y1 < s->sps->height) {
2181             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2182             if (more_data < 0)
2183                 return more_data;
2184         }
2185         if (more_data && x1 < s->sps->width &&
2186             y1 < s->sps->height) {
2187             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2188             if (more_data < 0)
2189                 return more_data;
2190         }
2191
2192         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2193             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2194             lc->qPy_pred = lc->qp_y;
2195
2196         if (more_data)
2197             return ((x1 + cb_size_split) < s->sps->width ||
2198                     (y1 + cb_size_split) < s->sps->height);
2199         else
2200             return 0;
2201     } else {
2202         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2203         if (ret < 0)
2204             return ret;
2205         if ((!((x0 + cb_size) %
2206                (1 << (s->sps->log2_ctb_size))) ||
2207              (x0 + cb_size >= s->sps->width)) &&
2208             (!((y0 + cb_size) %
2209                (1 << (s->sps->log2_ctb_size))) ||
2210              (y0 + cb_size >= s->sps->height))) {
2211             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2212             return !end_of_slice_flag;
2213         } else {
2214             return 1;
2215         }
2216     }
2217
2218     return 0;
2219 }
2220
2221 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2222                                  int ctb_addr_ts)
2223 {
2224     HEVCLocalContext *lc  = s->HEVClc;
2225     int ctb_size          = 1 << s->sps->log2_ctb_size;
2226     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2227     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2228
2229     int tile_left_boundary, tile_up_boundary;
2230     int slice_left_boundary, slice_up_boundary;
2231
2232     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2233
2234     if (s->pps->entropy_coding_sync_enabled_flag) {
2235         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2236             lc->first_qp_group = 1;
2237         lc->end_of_tiles_x = s->sps->width;
2238     } else if (s->pps->tiles_enabled_flag) {
2239         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2240             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2241             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2242             lc->first_qp_group   = 1;
2243         }
2244     } else {
2245         lc->end_of_tiles_x = s->sps->width;
2246     }
2247
2248     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2249
2250     if (s->pps->tiles_enabled_flag) {
2251         tile_left_boundary = x_ctb > 0 &&
2252                              s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
2253         slice_left_boundary = x_ctb > 0 &&
2254                               s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1];
2255         tile_up_boundary  = y_ctb > 0 &&
2256                             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2257         slice_up_boundary = y_ctb > 0 &&
2258                             s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2259     } else {
2260         tile_left_boundary =
2261         tile_up_boundary   = 0;
2262         slice_left_boundary = ctb_addr_in_slice <= 0;
2263         slice_up_boundary   = ctb_addr_in_slice < s->sps->ctb_width;
2264     }
2265     lc->slice_or_tiles_left_boundary = slice_left_boundary + (tile_left_boundary << 1);
2266     lc->slice_or_tiles_up_boundary   = slice_up_boundary   + (tile_up_boundary   << 1);
2267     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0)                  && !tile_left_boundary);
2268     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !tile_up_boundary);
2269     lc->ctb_up_right_flag = ((y_ctb > 0)                 && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2270     lc->ctb_up_left_flag  = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2271 }
2272
2273 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2274 {
2275     HEVCContext *s  = avctxt->priv_data;
2276     int ctb_size    = 1 << s->sps->log2_ctb_size;
2277     int more_data   = 1;
2278     int x_ctb       = 0;
2279     int y_ctb       = 0;
2280     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2281
2282     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2283         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2284         return AVERROR_INVALIDDATA;
2285     }
2286
2287     if (s->sh.dependent_slice_segment_flag) {
2288         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2289         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2290             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2291             return AVERROR_INVALIDDATA;
2292         }
2293     }
2294
2295     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2296         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2297
2298         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2299         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2300         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2301
2302         ff_hevc_cabac_init(s, ctb_addr_ts);
2303
2304         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2305
2306         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2307         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2308         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2309
2310         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2311         if (more_data < 0) {
2312             s->tab_slice_address[ctb_addr_rs] = -1;
2313             return more_data;
2314         }
2315
2316
2317         ctb_addr_ts++;
2318         ff_hevc_save_states(s, ctb_addr_ts);
2319         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2320     }
2321
2322     if (x_ctb + ctb_size >= s->sps->width &&
2323         y_ctb + ctb_size >= s->sps->height)
2324         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2325
2326     return ctb_addr_ts;
2327 }
2328
2329 static int hls_slice_data(HEVCContext *s)
2330 {
2331     int arg[2];
2332     int ret[2];
2333
2334     arg[0] = 0;
2335     arg[1] = 1;
2336
2337     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2338     return ret[0];
2339 }
2340 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2341 {
2342     HEVCContext *s1  = avctxt->priv_data, *s;
2343     HEVCLocalContext *lc;
2344     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2345     int more_data   = 1;
2346     int *ctb_row_p    = input_ctb_row;
2347     int ctb_row = ctb_row_p[job];
2348     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2349     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2350     int thread = ctb_row % s1->threads_number;
2351     int ret;
2352
2353     s = s1->sList[self_id];
2354     lc = s->HEVClc;
2355
2356     if(ctb_row) {
2357         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2358
2359         if (ret < 0)
2360             return ret;
2361         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2362     }
2363
2364     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2365         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2366         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2367
2368         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2369
2370         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2371
2372         if (avpriv_atomic_int_get(&s1->wpp_err)){
2373             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2374             return 0;
2375         }
2376
2377         ff_hevc_cabac_init(s, ctb_addr_ts);
2378         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2379         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2380
2381         if (more_data < 0) {
2382             s->tab_slice_address[ctb_addr_rs] = -1;
2383             return more_data;
2384         }
2385
2386         ctb_addr_ts++;
2387
2388         ff_hevc_save_states(s, ctb_addr_ts);
2389         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2390         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2391
2392         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2393             avpriv_atomic_int_set(&s1->wpp_err,  1);
2394             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2395             return 0;
2396         }
2397
2398         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2399             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2400             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2401             return ctb_addr_ts;
2402         }
2403         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2404         x_ctb+=ctb_size;
2405
2406         if(x_ctb >= s->sps->width) {
2407             break;
2408         }
2409     }
2410     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2411
2412     return 0;
2413 }
2414
2415 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2416 {
2417     HEVCLocalContext *lc = s->HEVClc;
2418     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2419     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2420     int offset;
2421     int startheader, cmpt = 0;
2422     int i, j, res = 0;
2423
2424
2425     if (!s->sList[1]) {
2426         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2427
2428
2429         for (i = 1; i < s->threads_number; i++) {
2430             s->sList[i] = av_malloc(sizeof(HEVCContext));
2431             memcpy(s->sList[i], s, sizeof(HEVCContext));
2432             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2433             s->sList[i]->HEVClc = s->HEVClcList[i];
2434         }
2435     }
2436
2437     offset = (lc->gb.index >> 3);
2438
2439     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2440         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2441             startheader--;
2442             cmpt++;
2443         }
2444     }
2445
2446     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2447         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2448         for (j = 0, cmpt = 0, startheader = offset
2449              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2450             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2451                 startheader--;
2452                 cmpt++;
2453             }
2454         }
2455         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2456         s->sh.offset[i - 1] = offset;
2457
2458     }
2459     if (s->sh.num_entry_point_offsets != 0) {
2460         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2461         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2462         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2463
2464     }
2465     s->data = nal;
2466
2467     for (i = 1; i < s->threads_number; i++) {
2468         s->sList[i]->HEVClc->first_qp_group = 1;
2469         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2470         memcpy(s->sList[i], s, sizeof(HEVCContext));
2471         s->sList[i]->HEVClc = s->HEVClcList[i];
2472     }
2473
2474     avpriv_atomic_int_set(&s->wpp_err, 0);
2475     ff_reset_entries(s->avctx);
2476
2477     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2478         arg[i] = i;
2479         ret[i] = 0;
2480     }
2481
2482     if (s->pps->entropy_coding_sync_enabled_flag)
2483         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2484
2485     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2486         res += ret[i];
2487     av_free(ret);
2488     av_free(arg);
2489     return res;
2490 }
2491
2492 /**
2493  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2494  * 0 if the unit should be skipped, 1 otherwise
2495  */
2496 static int hls_nal_unit(HEVCContext *s)
2497 {
2498     GetBitContext *gb = &s->HEVClc->gb;
2499     int nuh_layer_id;
2500
2501     if (get_bits1(gb) != 0)
2502         return AVERROR_INVALIDDATA;
2503
2504     s->nal_unit_type = get_bits(gb, 6);
2505
2506     nuh_layer_id   = get_bits(gb, 6);
2507     s->temporal_id = get_bits(gb, 3) - 1;
2508     if (s->temporal_id < 0)
2509         return AVERROR_INVALIDDATA;
2510
2511     av_log(s->avctx, AV_LOG_DEBUG,
2512            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2513            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2514
2515     return nuh_layer_id == 0;
2516 }
2517
2518 static int set_side_data(HEVCContext *s)
2519 {
2520     AVFrame *out = s->ref->frame;
2521
2522     if (s->sei_frame_packing_present &&
2523         s->frame_packing_arrangement_type >= 3 &&
2524         s->frame_packing_arrangement_type <= 5 &&
2525         s->content_interpretation_type > 0 &&
2526         s->content_interpretation_type < 3) {
2527         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2528         if (!stereo)
2529             return AVERROR(ENOMEM);
2530
2531         switch (s->frame_packing_arrangement_type) {
2532         case 3:
2533             if (s->quincunx_subsampling)
2534                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2535             else
2536                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2537             break;
2538         case 4:
2539             stereo->type = AV_STEREO3D_TOPBOTTOM;
2540             break;
2541         case 5:
2542             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2543             break;
2544         }
2545
2546         if (s->content_interpretation_type == 2)
2547             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2548     }
2549
2550     if (s->sei_display_orientation_present &&
2551         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2552         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2553         AVFrameSideData *rotation = av_frame_new_side_data(out,
2554                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2555                                                            sizeof(int32_t) * 9);
2556         if (!rotation)
2557             return AVERROR(ENOMEM);
2558
2559         av_display_rotation_set((int32_t *)rotation->data, angle);
2560         av_display_matrix_flip((int32_t *)rotation->data,
2561                                s->sei_vflip, s->sei_hflip);
2562     }
2563
2564     return 0;
2565 }
2566
2567 static int hevc_frame_start(HEVCContext *s)
2568 {
2569     HEVCLocalContext *lc = s->HEVClc;
2570     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2571                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2572     int ret;
2573
2574     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2575     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2576     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2577     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2578     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2579
2580     s->is_decoded        = 0;
2581     s->first_nal_type    = s->nal_unit_type;
2582
2583     if (s->pps->tiles_enabled_flag)
2584         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2585
2586     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2587     if (ret < 0)
2588         goto fail;
2589
2590     ret = ff_hevc_frame_rps(s);
2591     if (ret < 0) {
2592         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2593         goto fail;
2594     }
2595
2596     s->ref->frame->key_frame = IS_IRAP(s);
2597
2598     ret = set_side_data(s);
2599     if (ret < 0)
2600         goto fail;
2601
2602     s->frame->pict_type = 3 - s->sh.slice_type;
2603
2604     if (!IS_IRAP(s))
2605         ff_hevc_bump_frame(s);
2606
2607     av_frame_unref(s->output_frame);
2608     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2609     if (ret < 0)
2610         goto fail;
2611
2612     ff_thread_finish_setup(s->avctx);
2613
2614     return 0;
2615
2616 fail:
2617     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2618         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2619     s->ref = NULL;
2620     return ret;
2621 }
2622
2623 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2624 {
2625     HEVCLocalContext *lc = s->HEVClc;
2626     GetBitContext *gb    = &lc->gb;
2627     int ctb_addr_ts, ret;
2628
2629     ret = init_get_bits8(gb, nal, length);
2630     if (ret < 0)
2631         return ret;
2632
2633     ret = hls_nal_unit(s);
2634     if (ret < 0) {
2635         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2636                s->nal_unit_type);
2637         goto fail;
2638     } else if (!ret)
2639         return 0;
2640
2641     switch (s->nal_unit_type) {
2642     case NAL_VPS:
2643         ret = ff_hevc_decode_nal_vps(s);
2644         if (ret < 0)
2645             goto fail;
2646         break;
2647     case NAL_SPS:
2648         ret = ff_hevc_decode_nal_sps(s);
2649         if (ret < 0)
2650             goto fail;
2651         break;
2652     case NAL_PPS:
2653         ret = ff_hevc_decode_nal_pps(s);
2654         if (ret < 0)
2655             goto fail;
2656         break;
2657     case NAL_SEI_PREFIX:
2658     case NAL_SEI_SUFFIX:
2659         ret = ff_hevc_decode_nal_sei(s);
2660         if (ret < 0)
2661             goto fail;
2662         break;
2663     case NAL_TRAIL_R:
2664     case NAL_TRAIL_N:
2665     case NAL_TSA_N:
2666     case NAL_TSA_R:
2667     case NAL_STSA_N:
2668     case NAL_STSA_R:
2669     case NAL_BLA_W_LP:
2670     case NAL_BLA_W_RADL:
2671     case NAL_BLA_N_LP:
2672     case NAL_IDR_W_RADL:
2673     case NAL_IDR_N_LP:
2674     case NAL_CRA_NUT:
2675     case NAL_RADL_N:
2676     case NAL_RADL_R:
2677     case NAL_RASL_N:
2678     case NAL_RASL_R:
2679         ret = hls_slice_header(s);
2680         if (ret < 0)
2681             return ret;
2682
2683         if (s->max_ra == INT_MAX) {
2684             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2685                 s->max_ra = s->poc;
2686             } else {
2687                 if (IS_IDR(s))
2688                     s->max_ra = INT_MIN;
2689             }
2690         }
2691
2692         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2693             s->poc <= s->max_ra) {
2694             s->is_decoded = 0;
2695             break;
2696         } else {
2697             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2698                 s->max_ra = INT_MIN;
2699         }
2700
2701         if (s->sh.first_slice_in_pic_flag) {
2702             ret = hevc_frame_start(s);
2703             if (ret < 0)
2704                 return ret;
2705         } else if (!s->ref) {
2706             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2707             goto fail;
2708         }
2709
2710         if (s->nal_unit_type != s->first_nal_type) {
2711             av_log(s->avctx, AV_LOG_ERROR,
2712                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2713                    s->first_nal_type, s->nal_unit_type);
2714             return AVERROR_INVALIDDATA;
2715         }
2716
2717         if (!s->sh.dependent_slice_segment_flag &&
2718             s->sh.slice_type != I_SLICE) {
2719             ret = ff_hevc_slice_rpl(s);
2720             if (ret < 0) {
2721                 av_log(s->avctx, AV_LOG_WARNING,
2722                        "Error constructing the reference lists for the current slice.\n");
2723                 goto fail;
2724             }
2725         }
2726
2727         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2728             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2729         else
2730             ctb_addr_ts = hls_slice_data(s);
2731         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2732             s->is_decoded = 1;
2733         }
2734
2735         if (ctb_addr_ts < 0) {
2736             ret = ctb_addr_ts;
2737             goto fail;
2738         }
2739         break;
2740     case NAL_EOS_NUT:
2741     case NAL_EOB_NUT:
2742         s->seq_decode = (s->seq_decode + 1) & 0xff;
2743         s->max_ra     = INT_MAX;
2744         break;
2745     case NAL_AUD:
2746     case NAL_FD_NUT:
2747         break;
2748     default:
2749         av_log(s->avctx, AV_LOG_INFO,
2750                "Skipping NAL unit %d\n", s->nal_unit_type);
2751     }
2752
2753     return 0;
2754 fail:
2755     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2756         return ret;
2757     return 0;
2758 }
2759
2760 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2761  * between these functions would be nice. */
2762 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2763                          HEVCNAL *nal)
2764 {
2765     int i, si, di;
2766     uint8_t *dst;
2767
2768     s->skipped_bytes = 0;
2769 #define STARTCODE_TEST                                                  \
2770         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2771             if (src[i + 2] != 3) {                                      \
2772                 /* startcode, so we must be past the end */             \
2773                 length = i;                                             \
2774             }                                                           \
2775             break;                                                      \
2776         }
2777 #if HAVE_FAST_UNALIGNED
2778 #define FIND_FIRST_ZERO                                                 \
2779         if (i > 0 && !src[i])                                           \
2780             i--;                                                        \
2781         while (src[i])                                                  \
2782             i++
2783 #if HAVE_FAST_64BIT
2784     for (i = 0; i + 1 < length; i += 9) {
2785         if (!((~AV_RN64A(src + i) &
2786                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2787               0x8000800080008080ULL))
2788             continue;
2789         FIND_FIRST_ZERO;
2790         STARTCODE_TEST;
2791         i -= 7;
2792     }
2793 #else
2794     for (i = 0; i + 1 < length; i += 5) {
2795         if (!((~AV_RN32A(src + i) &
2796                (AV_RN32A(src + i) - 0x01000101U)) &
2797               0x80008080U))
2798             continue;
2799         FIND_FIRST_ZERO;
2800         STARTCODE_TEST;
2801         i -= 3;
2802     }
2803 #endif /* HAVE_FAST_64BIT */
2804 #else
2805     for (i = 0; i + 1 < length; i += 2) {
2806         if (src[i])
2807             continue;
2808         if (i > 0 && src[i - 1] == 0)
2809             i--;
2810         STARTCODE_TEST;
2811     }
2812 #endif /* HAVE_FAST_UNALIGNED */
2813
2814     if (i >= length - 1) { // no escaped 0
2815         nal->data = src;
2816         nal->size = length;
2817         return length;
2818     }
2819
2820     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2821                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2822     if (!nal->rbsp_buffer)
2823         return AVERROR(ENOMEM);
2824
2825     dst = nal->rbsp_buffer;
2826
2827     memcpy(dst, src, i);
2828     si = di = i;
2829     while (si + 2 < length) {
2830         // remove escapes (very rare 1:2^22)
2831         if (src[si + 2] > 3) {
2832             dst[di++] = src[si++];
2833             dst[di++] = src[si++];
2834         } else if (src[si] == 0 && src[si + 1] == 0) {
2835             if (src[si + 2] == 3) { // escape
2836                 dst[di++] = 0;
2837                 dst[di++] = 0;
2838                 si       += 3;
2839
2840                 s->skipped_bytes++;
2841                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2842                     s->skipped_bytes_pos_size *= 2;
2843                     av_reallocp_array(&s->skipped_bytes_pos,
2844                             s->skipped_bytes_pos_size,
2845                             sizeof(*s->skipped_bytes_pos));
2846                     if (!s->skipped_bytes_pos)
2847                         return AVERROR(ENOMEM);
2848                 }
2849                 if (s->skipped_bytes_pos)
2850                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2851                 continue;
2852             } else // next start code
2853                 goto nsc;
2854         }
2855
2856         dst[di++] = src[si++];
2857     }
2858     while (si < length)
2859         dst[di++] = src[si++];
2860
2861 nsc:
2862     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2863
2864     nal->data = dst;
2865     nal->size = di;
2866     return si;
2867 }
2868
2869 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2870 {
2871     int i, consumed, ret = 0;
2872
2873     s->ref = NULL;
2874     s->last_eos = s->eos;
2875     s->eos = 0;
2876
2877     /* split the input packet into NAL units, so we know the upper bound on the
2878      * number of slices in the frame */
2879     s->nb_nals = 0;
2880     while (length >= 4) {
2881         HEVCNAL *nal;
2882         int extract_length = 0;
2883
2884         if (s->is_nalff) {
2885             int i;
2886             for (i = 0; i < s->nal_length_size; i++)
2887                 extract_length = (extract_length << 8) | buf[i];
2888             buf    += s->nal_length_size;
2889             length -= s->nal_length_size;
2890
2891             if (extract_length > length) {
2892                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2893                 ret = AVERROR_INVALIDDATA;
2894                 goto fail;
2895             }
2896         } else {
2897             /* search start code */
2898             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2899                 ++buf;
2900                 --length;
2901                 if (length < 4) {
2902                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2903                     ret = AVERROR_INVALIDDATA;
2904                     goto fail;
2905                 }
2906             }
2907
2908             buf           += 3;
2909             length        -= 3;
2910         }
2911
2912         if (!s->is_nalff)
2913             extract_length = length;
2914
2915         if (s->nals_allocated < s->nb_nals + 1) {
2916             int new_size = s->nals_allocated + 1;
2917             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2918             if (!tmp) {
2919                 ret = AVERROR(ENOMEM);
2920                 goto fail;
2921             }
2922             s->nals = tmp;
2923             memset(s->nals + s->nals_allocated, 0,
2924                    (new_size - s->nals_allocated) * sizeof(*tmp));
2925             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2926             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2927             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2928             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2929             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2930             s->nals_allocated = new_size;
2931         }
2932         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2933         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2934         nal = &s->nals[s->nb_nals];
2935
2936         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2937
2938         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2939         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2940         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2941
2942
2943         if (consumed < 0) {
2944             ret = consumed;
2945             goto fail;
2946         }
2947
2948         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2949         if (ret < 0)
2950             goto fail;
2951         hls_nal_unit(s);
2952
2953         if (s->nal_unit_type == NAL_EOB_NUT ||
2954             s->nal_unit_type == NAL_EOS_NUT)
2955             s->eos = 1;
2956
2957         buf    += consumed;
2958         length -= consumed;
2959     }
2960
2961     /* parse the NAL units */
2962     for (i = 0; i < s->nb_nals; i++) {
2963         int ret;
2964         s->skipped_bytes = s->skipped_bytes_nal[i];
2965         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2966
2967         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2968         if (ret < 0) {
2969             av_log(s->avctx, AV_LOG_WARNING,
2970                    "Error parsing NAL unit #%d.\n", i);
2971             goto fail;
2972         }
2973     }
2974
2975 fail:
2976     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2977         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2978
2979     return ret;
2980 }
2981
2982 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2983 {
2984     int i;
2985     for (i = 0; i < 16; i++)
2986         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2987 }
2988
2989 static int verify_md5(HEVCContext *s, AVFrame *frame)
2990 {
2991     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2992     int pixel_shift;
2993     int i, j;
2994
2995     if (!desc)
2996         return AVERROR(EINVAL);
2997
2998     pixel_shift = desc->comp[0].depth_minus1 > 7;
2999
3000     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3001            s->poc);
3002
3003     /* the checksums are LE, so we have to byteswap for >8bpp formats
3004      * on BE arches */
3005 #if HAVE_BIGENDIAN
3006     if (pixel_shift && !s->checksum_buf) {
3007         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3008                        FFMAX3(frame->linesize[0], frame->linesize[1],
3009                               frame->linesize[2]));
3010         if (!s->checksum_buf)
3011             return AVERROR(ENOMEM);
3012     }
3013 #endif
3014
3015     for (i = 0; frame->data[i]; i++) {
3016         int width  = s->avctx->coded_width;
3017         int height = s->avctx->coded_height;
3018         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3019         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3020         uint8_t md5[16];
3021
3022         av_md5_init(s->md5_ctx);
3023         for (j = 0; j < h; j++) {
3024             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3025 #if HAVE_BIGENDIAN
3026             if (pixel_shift) {
3027                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3028                                     (const uint16_t *) src, w);
3029                 src = s->checksum_buf;
3030             }
3031 #endif
3032             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3033         }
3034         av_md5_final(s->md5_ctx, md5);
3035
3036         if (!memcmp(md5, s->md5[i], 16)) {
3037             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3038             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3039             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3040         } else {
3041             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3042             print_md5(s->avctx, AV_LOG_ERROR, md5);
3043             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3044             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3045             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3046             return AVERROR_INVALIDDATA;
3047         }
3048     }
3049
3050     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3051
3052     return 0;
3053 }
3054
3055 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3056                              AVPacket *avpkt)
3057 {
3058     int ret;
3059     HEVCContext *s = avctx->priv_data;
3060
3061     if (!avpkt->size) {
3062         ret = ff_hevc_output_frame(s, data, 1);
3063         if (ret < 0)
3064             return ret;
3065
3066         *got_output = ret;
3067         return 0;
3068     }
3069
3070     s->ref = NULL;
3071     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3072     if (ret < 0)
3073         return ret;
3074
3075     /* verify the SEI checksum */
3076     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3077         s->is_md5) {
3078         ret = verify_md5(s, s->ref->frame);
3079         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3080             ff_hevc_unref_frame(s, s->ref, ~0);
3081             return ret;
3082         }
3083     }
3084     s->is_md5 = 0;
3085
3086     if (s->is_decoded) {
3087         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3088         s->is_decoded = 0;
3089     }
3090
3091     if (s->output_frame->buf[0]) {
3092         av_frame_move_ref(data, s->output_frame);
3093         *got_output = 1;
3094     }
3095
3096     return avpkt->size;
3097 }
3098
3099 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3100 {
3101     int ret;
3102
3103     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3104     if (ret < 0)
3105         return ret;
3106
3107     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3108     if (!dst->tab_mvf_buf)
3109         goto fail;
3110     dst->tab_mvf = src->tab_mvf;
3111
3112     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3113     if (!dst->rpl_tab_buf)
3114         goto fail;
3115     dst->rpl_tab = src->rpl_tab;
3116
3117     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3118     if (!dst->rpl_buf)
3119         goto fail;
3120
3121     dst->poc        = src->poc;
3122     dst->ctb_count  = src->ctb_count;
3123     dst->window     = src->window;
3124     dst->flags      = src->flags;
3125     dst->sequence   = src->sequence;
3126
3127     return 0;
3128 fail:
3129     ff_hevc_unref_frame(s, dst, ~0);
3130     return AVERROR(ENOMEM);
3131 }
3132
3133 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3134 {
3135     HEVCContext       *s = avctx->priv_data;
3136     HEVCLocalContext *lc = s->HEVClc;
3137     int i;
3138
3139     pic_arrays_free(s);
3140
3141     av_freep(&s->md5_ctx);
3142
3143     for(i=0; i < s->nals_allocated; i++) {
3144         av_freep(&s->skipped_bytes_pos_nal[i]);
3145     }
3146     av_freep(&s->skipped_bytes_pos_size_nal);
3147     av_freep(&s->skipped_bytes_nal);
3148     av_freep(&s->skipped_bytes_pos_nal);
3149
3150     av_freep(&s->cabac_state);
3151
3152     av_frame_free(&s->tmp_frame);
3153     av_frame_free(&s->output_frame);
3154
3155     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3156         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3157         av_frame_free(&s->DPB[i].frame);
3158     }
3159
3160     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3161         av_buffer_unref(&s->vps_list[i]);
3162     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3163         av_buffer_unref(&s->sps_list[i]);
3164     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3165         av_buffer_unref(&s->pps_list[i]);
3166     s->sps = NULL;
3167     s->pps = NULL;
3168     s->vps = NULL;
3169
3170     av_buffer_unref(&s->current_sps);
3171
3172     av_freep(&s->sh.entry_point_offset);
3173     av_freep(&s->sh.offset);
3174     av_freep(&s->sh.size);
3175
3176     for (i = 1; i < s->threads_number; i++) {
3177         lc = s->HEVClcList[i];
3178         if (lc) {
3179             av_freep(&s->HEVClcList[i]);
3180             av_freep(&s->sList[i]);
3181         }
3182     }
3183     if (s->HEVClc == s->HEVClcList[0])
3184         s->HEVClc = NULL;
3185     av_freep(&s->HEVClcList[0]);
3186
3187     for (i = 0; i < s->nals_allocated; i++)
3188         av_freep(&s->nals[i].rbsp_buffer);
3189     av_freep(&s->nals);
3190     s->nals_allocated = 0;
3191
3192     return 0;
3193 }
3194
3195 static av_cold int hevc_init_context(AVCodecContext *avctx)
3196 {
3197     HEVCContext *s = avctx->priv_data;
3198     int i;
3199
3200     s->avctx = avctx;
3201
3202     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3203     if (!s->HEVClc)
3204         goto fail;
3205     s->HEVClcList[0] = s->HEVClc;
3206     s->sList[0] = s;
3207
3208     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3209     if (!s->cabac_state)
3210         goto fail;
3211
3212     s->tmp_frame = av_frame_alloc();
3213     if (!s->tmp_frame)
3214         goto fail;
3215
3216     s->output_frame = av_frame_alloc();
3217     if (!s->output_frame)
3218         goto fail;
3219
3220     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3221         s->DPB[i].frame = av_frame_alloc();
3222         if (!s->DPB[i].frame)
3223             goto fail;
3224         s->DPB[i].tf.f = s->DPB[i].frame;
3225     }
3226
3227     s->max_ra = INT_MAX;
3228
3229     s->md5_ctx = av_md5_alloc();
3230     if (!s->md5_ctx)
3231         goto fail;
3232
3233     ff_bswapdsp_init(&s->bdsp);
3234
3235     s->context_initialized = 1;
3236     s->eos = 0;
3237
3238     return 0;
3239
3240 fail:
3241     hevc_decode_free(avctx);
3242     return AVERROR(ENOMEM);
3243 }
3244
3245 static int hevc_update_thread_context(AVCodecContext *dst,
3246                                       const AVCodecContext *src)
3247 {
3248     HEVCContext *s  = dst->priv_data;
3249     HEVCContext *s0 = src->priv_data;
3250     int i, ret;
3251
3252     if (!s->context_initialized) {
3253         ret = hevc_init_context(dst);
3254         if (ret < 0)
3255             return ret;
3256     }
3257
3258     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3259         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3260         if (s0->DPB[i].frame->buf[0]) {
3261             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3262             if (ret < 0)
3263                 return ret;
3264         }
3265     }
3266
3267     if (s->sps != s0->sps)
3268         s->sps = NULL;
3269     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3270         av_buffer_unref(&s->vps_list[i]);
3271         if (s0->vps_list[i]) {
3272             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3273             if (!s->vps_list[i])
3274                 return AVERROR(ENOMEM);
3275         }
3276     }
3277
3278     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3279         av_buffer_unref(&s->sps_list[i]);
3280         if (s0->sps_list[i]) {
3281             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3282             if (!s->sps_list[i])
3283                 return AVERROR(ENOMEM);
3284         }
3285     }
3286
3287     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3288         av_buffer_unref(&s->pps_list[i]);
3289         if (s0->pps_list[i]) {
3290             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3291             if (!s->pps_list[i])
3292                 return AVERROR(ENOMEM);
3293         }
3294     }
3295
3296     av_buffer_unref(&s->current_sps);
3297     if (s0->current_sps) {
3298         s->current_sps = av_buffer_ref(s0->current_sps);
3299         if (!s->current_sps)
3300             return AVERROR(ENOMEM);
3301     }
3302
3303     if (s->sps != s0->sps)
3304         if ((ret = set_sps(s, s0->sps)) < 0)
3305             return ret;
3306
3307     s->seq_decode = s0->seq_decode;
3308     s->seq_output = s0->seq_output;
3309     s->pocTid0    = s0->pocTid0;
3310     s->max_ra     = s0->max_ra;
3311     s->eos        = s0->eos;
3312
3313     s->is_nalff        = s0->is_nalff;
3314     s->nal_length_size = s0->nal_length_size;
3315
3316     s->threads_number      = s0->threads_number;
3317     s->threads_type        = s0->threads_type;
3318
3319     if (s0->eos) {
3320         s->seq_decode = (s->seq_decode + 1) & 0xff;
3321         s->max_ra = INT_MAX;
3322     }
3323
3324     return 0;
3325 }
3326
3327 static int hevc_decode_extradata(HEVCContext *s)
3328 {
3329     AVCodecContext *avctx = s->avctx;
3330     GetByteContext gb;
3331     int ret;
3332
3333     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3334
3335     if (avctx->extradata_size > 3 &&
3336         (avctx->extradata[0] || avctx->extradata[1] ||
3337          avctx->extradata[2] > 1)) {
3338         /* It seems the extradata is encoded as hvcC format.
3339          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3340          * is finalized. When finalized, configurationVersion will be 1 and we
3341          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3342         int i, j, num_arrays, nal_len_size;
3343
3344         s->is_nalff = 1;
3345
3346         bytestream2_skip(&gb, 21);
3347         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3348         num_arrays   = bytestream2_get_byte(&gb);
3349
3350         /* nal units in the hvcC always have length coded with 2 bytes,
3351          * so put a fake nal_length_size = 2 while parsing them */
3352         s->nal_length_size = 2;
3353
3354         /* Decode nal units from hvcC. */
3355         for (i = 0; i < num_arrays; i++) {
3356             int type = bytestream2_get_byte(&gb) & 0x3f;
3357             int cnt  = bytestream2_get_be16(&gb);
3358
3359             for (j = 0; j < cnt; j++) {
3360                 // +2 for the nal size field
3361                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3362                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3363                     av_log(s->avctx, AV_LOG_ERROR,
3364                            "Invalid NAL unit size in extradata.\n");
3365                     return AVERROR_INVALIDDATA;
3366                 }
3367
3368                 ret = decode_nal_units(s, gb.buffer, nalsize);
3369                 if (ret < 0) {
3370                     av_log(avctx, AV_LOG_ERROR,
3371                            "Decoding nal unit %d %d from hvcC failed\n",
3372                            type, i);
3373                     return ret;
3374                 }
3375                 bytestream2_skip(&gb, nalsize);
3376             }
3377         }
3378
3379         /* Now store right nal length size, that will be used to parse
3380          * all other nals */
3381         s->nal_length_size = nal_len_size;
3382     } else {
3383         s->is_nalff = 0;
3384         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3385         if (ret < 0)
3386             return ret;
3387     }
3388     return 0;
3389 }
3390
3391 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3392 {
3393     HEVCContext *s = avctx->priv_data;
3394     int ret;
3395
3396     ff_init_cabac_states();
3397
3398     avctx->internal->allocate_progress = 1;
3399
3400     ret = hevc_init_context(avctx);
3401     if (ret < 0)
3402         return ret;
3403
3404     s->enable_parallel_tiles = 0;
3405     s->picture_struct = 0;
3406
3407     if(avctx->active_thread_type & FF_THREAD_SLICE)
3408         s->threads_number = avctx->thread_count;
3409     else
3410         s->threads_number = 1;
3411
3412     if (avctx->extradata_size > 0 && avctx->extradata) {
3413         ret = hevc_decode_extradata(s);
3414         if (ret < 0) {
3415             hevc_decode_free(avctx);
3416             return ret;
3417         }
3418     }
3419
3420     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3421             s->threads_type = FF_THREAD_FRAME;
3422         else
3423             s->threads_type = FF_THREAD_SLICE;
3424
3425     return 0;
3426 }
3427
3428 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3429 {
3430     HEVCContext *s = avctx->priv_data;
3431     int ret;
3432
3433     memset(s, 0, sizeof(*s));
3434
3435     ret = hevc_init_context(avctx);
3436     if (ret < 0)
3437         return ret;
3438
3439     return 0;
3440 }
3441
3442 static void hevc_decode_flush(AVCodecContext *avctx)
3443 {
3444     HEVCContext *s = avctx->priv_data;
3445     ff_hevc_flush_dpb(s);
3446     s->max_ra = INT_MAX;
3447 }
3448
3449 #define OFFSET(x) offsetof(HEVCContext, x)
3450 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3451
3452 static const AVProfile profiles[] = {
3453     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3454     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3455     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3456     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3457     { FF_PROFILE_UNKNOWN },
3458 };
3459
3460 static const AVOption options[] = {
3461     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3462         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3463     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3464         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3465     { NULL },
3466 };
3467
3468 static const AVClass hevc_decoder_class = {
3469     .class_name = "HEVC decoder",
3470     .item_name  = av_default_item_name,
3471     .option     = options,
3472     .version    = LIBAVUTIL_VERSION_INT,
3473 };
3474
3475 AVCodec ff_hevc_decoder = {
3476     .name                  = "hevc",
3477     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3478     .type                  = AVMEDIA_TYPE_VIDEO,
3479     .id                    = AV_CODEC_ID_HEVC,
3480     .priv_data_size        = sizeof(HEVCContext),
3481     .priv_class            = &hevc_decoder_class,
3482     .init                  = hevc_decode_init,
3483     .close                 = hevc_decode_free,
3484     .decode                = hevc_decode_frame,
3485     .flush                 = hevc_decode_flush,
3486     .update_thread_context = hevc_update_thread_context,
3487     .init_thread_copy      = hevc_init_thread_copy,
3488     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3489                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3490     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3491 };