git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = width  >> 3;
  93     s->bs_height = height >> 3;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc(pic_size_in_ctb);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc(min_pu_size);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_malloc(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
 120     s->vertical_bs   = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146
 147     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 148     if (s->sps->chroma_format_idc != 0) {
 149         int delta = get_se_golomb(gb);
 150         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 151     }
 152
 153     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 154         luma_weight_l0_flag[i] = get_bits1(gb);
 155         if (!luma_weight_l0_flag[i]) {
 156             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 157             s->sh.luma_offset_l0[i] = 0;
 158         }
 159     }
 160     if (s->sps->chroma_format_idc != 0) {
 161         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 162             chroma_weight_l0_flag[i] = get_bits1(gb);
 163     } else {
 164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 165             chroma_weight_l0_flag[i] = 0;
 166     }
 167     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 168         if (luma_weight_l0_flag[i]) {
 169             int delta_luma_weight_l0 = get_se_golomb(gb);
 170             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 171             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 172         }
 173         if (chroma_weight_l0_flag[i]) {
 174             for (j = 0; j < 2; j++) {
 175                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 176                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 177                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 178                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 179                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 180             }
 181         } else {
 182             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 183             s->sh.chroma_offset_l0[i][0] = 0;
 184             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 185             s->sh.chroma_offset_l0[i][1] = 0;
 186         }
 187     }
 188     if (s->sh.slice_type == B_SLICE) {
 189         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 190             luma_weight_l1_flag[i] = get_bits1(gb);
 191             if (!luma_weight_l1_flag[i]) {
 192                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 193                 s->sh.luma_offset_l1[i] = 0;
 194             }
 195         }
 196         if (s->sps->chroma_format_idc != 0) {
 197             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 198                 chroma_weight_l1_flag[i] = get_bits1(gb);
 199         } else {
 200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 201                 chroma_weight_l1_flag[i] = 0;
 202         }
 203         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 204             if (luma_weight_l1_flag[i]) {
 205                 int delta_luma_weight_l1 = get_se_golomb(gb);
 206                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 207                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 208             }
 209             if (chroma_weight_l1_flag[i]) {
 210                 for (j = 0; j < 2; j++) {
 211                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 212                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 213                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 214                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 215                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 216                 }
 217             } else {
 218                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 219                 s->sh.chroma_offset_l1[i][0] = 0;
 220                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 221                 s->sh.chroma_offset_l1[i][1] = 0;
 222             }
 223         }
 224     }
 225 }
 226
 227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 228 {
 229     const HEVCSPS *sps = s->sps;
 230     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 231     int prev_delta_msb = 0;
 232     unsigned int nb_sps = 0, nb_sh;
 233     int i;
 234
 235     rps->nb_refs = 0;
 236     if (!sps->long_term_ref_pics_present_flag)
 237         return 0;
 238
 239     if (sps->num_long_term_ref_pics_sps > 0)
 240         nb_sps = get_ue_golomb_long(gb);
 241     nb_sh = get_ue_golomb_long(gb);
 242
 243     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 244         return AVERROR_INVALIDDATA;
 245
 246     rps->nb_refs = nb_sh + nb_sps;
 247
 248     for (i = 0; i < rps->nb_refs; i++) {
 249         uint8_t delta_poc_msb_present;
 250
 251         if (i < nb_sps) {
 252             uint8_t lt_idx_sps = 0;
 253
 254             if (sps->num_long_term_ref_pics_sps > 1)
 255                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 256
 257             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 258             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 259         } else {
 260             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 261             rps->used[i] = get_bits1(gb);
 262         }
 263
 264         delta_poc_msb_present = get_bits1(gb);
 265         if (delta_poc_msb_present) {
 266             int delta = get_ue_golomb_long(gb);
 267
 268             if (i && i != nb_sps)
 269                 delta += prev_delta_msb;
 270
 271             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 272             prev_delta_msb = delta;
 273         }
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 280 {
 281     int ret;
 282     unsigned int num = 0, den = 0;
 283
 284     pic_arrays_free(s);
 285     ret = pic_arrays_init(s, sps);
 286     if (ret < 0)
 287         goto fail;
 288
 289     s->avctx->coded_width         = sps->width;
 290     s->avctx->coded_height        = sps->height;
 291     s->avctx->width               = sps->output_width;
 292     s->avctx->height              = sps->output_height;
 293     s->avctx->pix_fmt             = sps->pix_fmt;
 294     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295
 296     ff_set_sar(s->avctx, sps->vui.sar);
 297
 298     if (sps->vui.video_signal_type_present_flag)
 299         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 300                                                                : AVCOL_RANGE_MPEG;
 301     else
 302         s->avctx->color_range = AVCOL_RANGE_MPEG;
 303
 304     if (sps->vui.colour_description_present_flag) {
 305         s->avctx->color_primaries = sps->vui.colour_primaries;
 306         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 307         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 308     } else {
 309         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 310         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 311         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 312     }
 313
 314     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 315     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 316     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 317
 318     if (sps->sao_enabled) {
 319         av_frame_unref(s->tmp_frame);
 320         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 321         if (ret < 0)
 322             goto fail;
 323         s->frame = s->tmp_frame;
 324     }
 325
 326     s->sps = sps;
 327     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 328
 329     if (s->vps->vps_timing_info_present_flag) {
 330         num = s->vps->vps_num_units_in_tick;
 331         den = s->vps->vps_time_scale;
 332     } else if (sps->vui.vui_timing_info_present_flag) {
 333         num = sps->vui.vui_num_units_in_tick;
 334         den = sps->vui.vui_time_scale;
 335     }
 336
 337     if (num != 0 && den != 0)
 338         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 339                   num, den, 1 << 30);
 340
 341     return 0;
 342
 343 fail:
 344     pic_arrays_free(s);
 345     s->sps = NULL;
 346     return ret;
 347 }
 348
 349 static int is_sps_exist(HEVCContext *s, const HEVCSPS* last_sps)
 350 {
 351     int i;
 352
 353     for( i = 0; i < MAX_SPS_COUNT; i++)
 354         if(s->sps_list[i])
 355             if (last_sps == (HEVCSPS*)s->sps_list[i]->data)
 356                 return 1;
 357     return 0;
 358 }
 359
 360 static int hls_slice_header(HEVCContext *s)
 361 {
 362     GetBitContext *gb = &s->HEVClc->gb;
 363     SliceHeader *sh   = &s->sh;
 364     int i, j, ret;
 365
 366     // Coded parameters
 367     sh->first_slice_in_pic_flag = get_bits1(gb);
 368     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 369         s->seq_decode = (s->seq_decode + 1) & 0xff;
 370         s->max_ra     = INT_MAX;
 371         if (IS_IDR(s))
 372             ff_hevc_clear_refs(s);
 373     }
 374     sh->no_output_of_prior_pics_flag = 0;
 375     if (IS_IRAP(s))
 376         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 377     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 378         sh->no_output_of_prior_pics_flag = 1;
 379
 380     sh->pps_id = get_ue_golomb_long(gb);
 381     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 382         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 383         return AVERROR_INVALIDDATA;
 384     }
 385     if (!sh->first_slice_in_pic_flag &&
 386         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 387         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 388         return AVERROR_INVALIDDATA;
 389     }
 390     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 391
 392     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 393         const HEVCSPS* last_sps = s->sps;
 394         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 395         if (last_sps) {
 396             if (is_sps_exist(s, last_sps)) {
 397                 if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 398                         s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering != last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 399                     sh->no_output_of_prior_pics_flag = 0;
 400             } else
 401                 sh->no_output_of_prior_pics_flag = 0;
 402         }
 403         ff_hevc_clear_refs(s);
 404         ret = set_sps(s, s->sps);
 405         if (ret < 0)
 406             return ret;
 407
 408         s->seq_decode = (s->seq_decode + 1) & 0xff;
 409         s->max_ra     = INT_MAX;
 410     }
 411
 412     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 413     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 414
 415     sh->dependent_slice_segment_flag = 0;
 416     if (!sh->first_slice_in_pic_flag) {
 417         int slice_address_length;
 418
 419         if (s->pps->dependent_slice_segments_enabled_flag)
 420             sh->dependent_slice_segment_flag = get_bits1(gb);
 421
 422         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 423                                             s->sps->ctb_height);
 424         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 425         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 426             av_log(s->avctx, AV_LOG_ERROR,
 427                    "Invalid slice segment address: %u.\n",
 428                    sh->slice_segment_addr);
 429             return AVERROR_INVALIDDATA;
 430         }
 431
 432         if (!sh->dependent_slice_segment_flag) {
 433             sh->slice_addr = sh->slice_segment_addr;
 434             s->slice_idx++;
 435         }
 436     } else {
 437         sh->slice_segment_addr = sh->slice_addr = 0;
 438         s->slice_idx           = 0;
 439         s->slice_initialized   = 0;
 440     }
 441
 442     if (!sh->dependent_slice_segment_flag) {
 443         s->slice_initialized = 0;
 444
 445         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 446             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 447
 448         sh->slice_type = get_ue_golomb_long(gb);
 449         if (!(sh->slice_type == I_SLICE ||
 450               sh->slice_type == P_SLICE ||
 451               sh->slice_type == B_SLICE)) {
 452             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 453                    sh->slice_type);
 454             return AVERROR_INVALIDDATA;
 455         }
 456         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 457             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 458             return AVERROR_INVALIDDATA;
 459         }
 460
 461         // when flag is not present, picture is inferred to be output
 462         sh->pic_output_flag = 1;
 463         if (s->pps->output_flag_present_flag)
 464             sh->pic_output_flag = get_bits1(gb);
 465
 466         if (s->sps->separate_colour_plane_flag)
 467             sh->colour_plane_id = get_bits(gb, 2);
 468
 469         if (!IS_IDR(s)) {
 470             int short_term_ref_pic_set_sps_flag, poc;
 471
 472             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 473             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 474             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 475                 av_log(s->avctx, AV_LOG_WARNING,
 476                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 477                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 478                     return AVERROR_INVALIDDATA;
 479                 poc = s->poc;
 480             }
 481             s->poc = poc;
 482
 483             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 484             if (!short_term_ref_pic_set_sps_flag) {
 485                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 486                 if (ret < 0)
 487                     return ret;
 488
 489                 sh->short_term_rps = &sh->slice_rps;
 490             } else {
 491                 int numbits, rps_idx;
 492
 493                 if (!s->sps->nb_st_rps) {
 494                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 495                     return AVERROR_INVALIDDATA;
 496                 }
 497
 498                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 499                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 500                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 501             }
 502
 503             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 504             if (ret < 0) {
 505                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 506                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 507                     return AVERROR_INVALIDDATA;
 508             }
 509
 510             if (s->sps->sps_temporal_mvp_enabled_flag)
 511                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 512             else
 513                 sh->slice_temporal_mvp_enabled_flag = 0;
 514         } else {
 515             s->sh.short_term_rps = NULL;
 516             s->poc               = 0;
 517         }
 518
 519         /* 8.3.1 */
 520         if (s->temporal_id == 0 &&
 521             s->nal_unit_type != NAL_TRAIL_N &&
 522             s->nal_unit_type != NAL_TSA_N   &&
 523             s->nal_unit_type != NAL_STSA_N  &&
 524             s->nal_unit_type != NAL_RADL_N  &&
 525             s->nal_unit_type != NAL_RADL_R  &&
 526             s->nal_unit_type != NAL_RASL_N  &&
 527             s->nal_unit_type != NAL_RASL_R)
 528             s->pocTid0 = s->poc;
 529
 530         if (s->sps->sao_enabled) {
 531             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 532             sh->slice_sample_adaptive_offset_flag[1] =
 533             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 534         } else {
 535             sh->slice_sample_adaptive_offset_flag[0] = 0;
 536             sh->slice_sample_adaptive_offset_flag[1] = 0;
 537             sh->slice_sample_adaptive_offset_flag[2] = 0;
 538         }
 539
 540         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 541         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 542             int nb_refs;
 543
 544             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 545             if (sh->slice_type == B_SLICE)
 546                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 547
 548             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 549                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 550                 if (sh->slice_type == B_SLICE)
 551                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 552             }
 553             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 554                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 555                        sh->nb_refs[L0], sh->nb_refs[L1]);
 556                 return AVERROR_INVALIDDATA;
 557             }
 558
 559             sh->rpl_modification_flag[0] = 0;
 560             sh->rpl_modification_flag[1] = 0;
 561             nb_refs = ff_hevc_frame_nb_refs(s);
 562             if (!nb_refs) {
 563                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 564                 return AVERROR_INVALIDDATA;
 565             }
 566
 567             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 568                 sh->rpl_modification_flag[0] = get_bits1(gb);
 569                 if (sh->rpl_modification_flag[0]) {
 570                     for (i = 0; i < sh->nb_refs[L0]; i++)
 571                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 572                 }
 573
 574                 if (sh->slice_type == B_SLICE) {
 575                     sh->rpl_modification_flag[1] = get_bits1(gb);
 576                     if (sh->rpl_modification_flag[1] == 1)
 577                         for (i = 0; i < sh->nb_refs[L1]; i++)
 578                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 579                 }
 580             }
 581
 582             if (sh->slice_type == B_SLICE)
 583                 sh->mvd_l1_zero_flag = get_bits1(gb);
 584
 585             if (s->pps->cabac_init_present_flag)
 586                 sh->cabac_init_flag = get_bits1(gb);
 587             else
 588                 sh->cabac_init_flag = 0;
 589
 590             sh->collocated_ref_idx = 0;
 591             if (sh->slice_temporal_mvp_enabled_flag) {
 592                 sh->collocated_list = L0;
 593                 if (sh->slice_type == B_SLICE)
 594                     sh->collocated_list = !get_bits1(gb);
 595
 596                 if (sh->nb_refs[sh->collocated_list] > 1) {
 597                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 598                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 599                         av_log(s->avctx, AV_LOG_ERROR,
 600                                "Invalid collocated_ref_idx: %d.\n",
 601                                sh->collocated_ref_idx);
 602                         return AVERROR_INVALIDDATA;
 603                     }
 604                 }
 605             }
 606
 607             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 608                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 609                 pred_weight_table(s, gb);
 610             }
 611
 612             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 613             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 614                 av_log(s->avctx, AV_LOG_ERROR,
 615                        "Invalid number of merging MVP candidates: %d.\n",
 616                        sh->max_num_merge_cand);
 617                 return AVERROR_INVALIDDATA;
 618             }
 619         }
 620
 621         sh->slice_qp_delta = get_se_golomb(gb);
 622
 623         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 624             sh->slice_cb_qp_offset = get_se_golomb(gb);
 625             sh->slice_cr_qp_offset = get_se_golomb(gb);
 626         } else {
 627             sh->slice_cb_qp_offset = 0;
 628             sh->slice_cr_qp_offset = 0;
 629         }
 630
 631         if (s->pps->chroma_qp_offset_list_enabled_flag)
 632             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 633         else
 634             sh->cu_chroma_qp_offset_enabled_flag = 0;
 635
 636         if (s->pps->deblocking_filter_control_present_flag) {
 637             int deblocking_filter_override_flag = 0;
 638
 639             if (s->pps->deblocking_filter_override_enabled_flag)
 640                 deblocking_filter_override_flag = get_bits1(gb);
 641
 642             if (deblocking_filter_override_flag) {
 643                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 644                 if (!sh->disable_deblocking_filter_flag) {
 645                     sh->beta_offset = get_se_golomb(gb) * 2;
 646                     sh->tc_offset   = get_se_golomb(gb) * 2;
 647                 }
 648             } else {
 649                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 650                 sh->beta_offset                    = s->pps->beta_offset;
 651                 sh->tc_offset                      = s->pps->tc_offset;
 652             }
 653         } else {
 654             sh->disable_deblocking_filter_flag = 0;
 655             sh->beta_offset                    = 0;
 656             sh->tc_offset                      = 0;
 657         }
 658
 659         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 660             (sh->slice_sample_adaptive_offset_flag[0] ||
 661              sh->slice_sample_adaptive_offset_flag[1] ||
 662              !sh->disable_deblocking_filter_flag)) {
 663             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 664         } else {
 665             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 666         }
 667     } else if (!s->slice_initialized) {
 668         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 669         return AVERROR_INVALIDDATA;
 670     }
 671
 672     sh->num_entry_point_offsets = 0;
 673     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 674         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 675         if (sh->num_entry_point_offsets > 0) {
 676             int offset_len = get_ue_golomb_long(gb) + 1;
 677             int segments = offset_len >> 4;
 678             int rest = (offset_len & 15);
 679             av_freep(&sh->entry_point_offset);
 680             av_freep(&sh->offset);
 681             av_freep(&sh->size);
 682             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 683             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 684             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 685             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 686                 sh->num_entry_point_offsets = 0;
 687                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 688                 return AVERROR(ENOMEM);
 689             }
 690             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 691                 int val = 0;
 692                 for (j = 0; j < segments; j++) {
 693                     val <<= 16;
 694                     val += get_bits(gb, 16);
 695                 }
 696                 if (rest) {
 697                     val <<= rest;
 698                     val += get_bits(gb, rest);
 699                 }
 700                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 701             }
 702             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 703                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 704                 s->threads_number = 1;
 705             } else
 706                 s->enable_parallel_tiles = 0;
 707         } else
 708             s->enable_parallel_tiles = 0;
 709     }
 710
 711     if (s->pps->slice_header_extension_present_flag) {
 712         unsigned int length = get_ue_golomb_long(gb);
 713         if (length*8LL > get_bits_left(gb)) {
 714             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 715             return AVERROR_INVALIDDATA;
 716         }
 717         for (i = 0; i < length; i++)
 718             skip_bits(gb, 8);  // slice_header_extension_data_byte
 719     }
 720
 721     // Inferred parameters
 722     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 723     if (sh->slice_qp > 51 ||
 724         sh->slice_qp < -s->sps->qp_bd_offset) {
 725         av_log(s->avctx, AV_LOG_ERROR,
 726                "The slice_qp %d is outside the valid range "
 727                "[%d, 51].\n",
 728                sh->slice_qp,
 729                -s->sps->qp_bd_offset);
 730         return AVERROR_INVALIDDATA;
 731     }
 732
 733     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 734
 735     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 736         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 737         return AVERROR_INVALIDDATA;
 738     }
 739
 740     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 741
 742     if (!s->pps->cu_qp_delta_enabled_flag)
 743         s->HEVClc->qp_y = s->sh.slice_qp;
 744
 745     s->slice_initialized = 1;
 746     s->HEVClc->tu.cu_qp_offset_cb = 0;
 747     s->HEVClc->tu.cu_qp_offset_cr = 0;
 748
 749     return 0;
 750 }
 751
 752 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 753
 754 #define SET_SAO(elem, value)                            \
 755 do {                                                    \
 756     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 757         sao->elem = value;                              \
 758     else if (sao_merge_left_flag)                       \
 759         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 760     else if (sao_merge_up_flag)                         \
 761         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 762     else                                                \
 763         sao->elem = 0;                                  \
 764 } while (0)
 765
 766 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 767 {
 768     HEVCLocalContext *lc    = s->HEVClc;
 769     int sao_merge_left_flag = 0;
 770     int sao_merge_up_flag   = 0;
 771     SAOParams *sao          = &CTB(s->sao, rx, ry);
 772     int c_idx, i;
 773
 774     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 775         s->sh.slice_sample_adaptive_offset_flag[1]) {
 776         if (rx > 0) {
 777             if (lc->ctb_left_flag)
 778                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 779         }
 780         if (ry > 0 && !sao_merge_left_flag) {
 781             if (lc->ctb_up_flag)
 782                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 783         }
 784     }
 785
 786     for (c_idx = 0; c_idx < 3; c_idx++) {
 787         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 788                                                  s->pps->log2_sao_offset_scale_chroma;
 789
 790         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 791             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 792             continue;
 793         }
 794
 795         if (c_idx == 2) {
 796             sao->type_idx[2] = sao->type_idx[1];
 797             sao->eo_class[2] = sao->eo_class[1];
 798         } else {
 799             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 800         }
 801
 802         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 803             continue;
 804
 805         for (i = 0; i < 4; i++)
 806             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 807
 808         if (sao->type_idx[c_idx] == SAO_BAND) {
 809             for (i = 0; i < 4; i++) {
 810                 if (sao->offset_abs[c_idx][i]) {
 811                     SET_SAO(offset_sign[c_idx][i],
 812                             ff_hevc_sao_offset_sign_decode(s));
 813                 } else {
 814                     sao->offset_sign[c_idx][i] = 0;
 815                 }
 816             }
 817             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 818         } else if (c_idx != 2) {
 819             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 820         }
 821
 822         // Inferred parameters
 823         sao->offset_val[c_idx][0] = 0;
 824         for (i = 0; i < 4; i++) {
 825             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 826             if (sao->type_idx[c_idx] == SAO_EDGE) {
 827                 if (i > 1)
 828                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 829             } else if (sao->offset_sign[c_idx][i]) {
 830                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 831             }
 832             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 833         }
 834     }
 835 }
 836
 837 #undef SET_SAO
 838 #undef CTB
 839
 840 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 841     HEVCLocalContext *lc    = s->HEVClc;
 842     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 843
 844     if (log2_res_scale_abs_plus1 !=  0) {
 845         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 846         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 847                                (1 - 2 * res_scale_sign_flag);
 848     } else {
 849         lc->tu.res_scale_val = 0;
 850     }
 851
 852
 853     return 0;
 854 }
 855
 856 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 857                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 858                               int log2_cb_size, int log2_trafo_size,
 859                               int trafo_depth, int blk_idx)
 860 {
 861     HEVCLocalContext *lc = s->HEVClc;
 862     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 863     int i;
 864
 865     if (lc->cu.pred_mode == MODE_INTRA) {
 866         int trafo_size = 1 << log2_trafo_size;
 867         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 868
 869         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 870     }
 871
 872     if (lc->tt.cbf_luma ||
 873         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 874         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 875         (s->sps->chroma_format_idc == 2 &&
 876          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 877          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))))) {
 878         int scan_idx   = SCAN_DIAG;
 879         int scan_idx_c = SCAN_DIAG;
 880         int cbf_luma = lc->tt.cbf_luma;
 881         int cbf_chroma = SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 882                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 883                          (s->sps->chroma_format_idc == 2 &&
 884                          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 885                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))));
 886
 887         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 888             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 889             if (lc->tu.cu_qp_delta != 0)
 890                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 891                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 892             lc->tu.is_cu_qp_delta_coded = 1;
 893
 894             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 895                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 896                 av_log(s->avctx, AV_LOG_ERROR,
 897                        "The cu_qp_delta %d is outside the valid range "
 898                        "[%d, %d].\n",
 899                        lc->tu.cu_qp_delta,
 900                        -(26 + s->sps->qp_bd_offset / 2),
 901                         (25 + s->sps->qp_bd_offset / 2));
 902                 return AVERROR_INVALIDDATA;
 903             }
 904
 905             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
 906         }
 907
 908         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 909             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 910             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 911             if (cu_chroma_qp_offset_flag) {
 912                 int cu_chroma_qp_offset_idx  = 0;
 913                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 914                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 915                     av_log(s->avctx, AV_LOG_ERROR,
 916                         "cu_chroma_qp_offset_idx not yet tested.\n");
 917                 }
 918                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 919                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 920             } else {
 921                 lc->tu.cu_qp_offset_cb = 0;
 922                 lc->tu.cu_qp_offset_cr = 0;
 923             }
 924             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 925         }
 926
 927         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 928             if (lc->tu.intra_pred_mode >= 6 &&
 929                 lc->tu.intra_pred_mode <= 14) {
 930                 scan_idx = SCAN_VERT;
 931             } else if (lc->tu.intra_pred_mode >= 22 &&
 932                        lc->tu.intra_pred_mode <= 30) {
 933                 scan_idx = SCAN_HORIZ;
 934             }
 935
 936             if (lc->tu.intra_pred_mode_c >=  6 &&
 937                 lc->tu.intra_pred_mode_c <= 14) {
 938                 scan_idx_c = SCAN_VERT;
 939             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 940                        lc->tu.intra_pred_mode_c <= 30) {
 941                 scan_idx_c = SCAN_HORIZ;
 942             }
 943         }
 944
 945         lc->tu.cross_pf = 0;
 946
 947         if (cbf_luma)
 948             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 949         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 950             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 951             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 952             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 953                                 (lc->cu.pred_mode == MODE_INTER ||
 954                                  (lc->tu.chroma_mode_c ==  4)));
 955
 956             if (lc->tu.cross_pf) {
 957                 hls_cross_component_pred(s, 0);
 958             }
 959             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 960                 if (lc->cu.pred_mode == MODE_INTRA) {
 961                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 962                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 963                 }
 964                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 965                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 966                                                 log2_trafo_size_c, scan_idx_c, 1);
 967                 else
 968                     if (lc->tu.cross_pf) {
 969                         ptrdiff_t stride = s->frame->linesize[1];
 970                         int hshift = s->sps->hshift[1];
 971                         int vshift = s->sps->vshift[1];
 972                         int16_t *coeffs_y = lc->tu.coeffs[0];
 973                         int16_t *coeffs =   lc->tu.coeffs[1];
 974                         int size = 1 << log2_trafo_size_c;
 975
 976                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
 977                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 978                         for (i = 0; i < (size * size); i++) {
 979                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 980                         }
 981                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
 982                     }
 983             }
 984
 985             if (lc->tu.cross_pf) {
 986                 hls_cross_component_pred(s, 1);
 987             }
 988             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 989                 if (lc->cu.pred_mode == MODE_INTRA) {
 990                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 991                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
 992                 }
 993                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 994                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 995                                                 log2_trafo_size_c, scan_idx_c, 2);
 996                 else
 997                     if (lc->tu.cross_pf) {
 998                         ptrdiff_t stride = s->frame->linesize[2];
 999                         int hshift = s->sps->hshift[2];
1000                         int vshift = s->sps->vshift[2];
1001                         int16_t *coeffs_y = lc->tu.coeffs[0];
1002                         int16_t *coeffs =   lc->tu.coeffs[1];
1003                         int size = 1 << log2_trafo_size_c;
1004
1005                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1006                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1007                         for (i = 0; i < (size * size); i++) {
1008                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1009                         }
1010                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
1011                     }
1012             }
1013         } else if (blk_idx == 3) {
1014             int trafo_size_h = 1 << (log2_trafo_size + 1);
1015             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1016             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1017                 if (lc->cu.pred_mode == MODE_INTRA) {
1018                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1019                                                     trafo_size_h, trafo_size_v);
1020                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1021                 }
1022                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1023                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1024                                                 log2_trafo_size, scan_idx_c, 1);
1025             }
1026             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1027                 if (lc->cu.pred_mode == MODE_INTRA) {
1028                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1029                                                 trafo_size_h, trafo_size_v);
1030                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1031                 }
1032                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1033                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1034                                                 log2_trafo_size, scan_idx_c, 2);
1035             }
1036         }
1037     } else if (lc->cu.pred_mode == MODE_INTRA) {
1038         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1039             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1040             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1041             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1042             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1043             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1044             if (s->sps->chroma_format_idc == 2) {
1045                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1046                                                 trafo_size_h, trafo_size_v);
1047                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1048                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1049             }
1050         } else if (blk_idx == 3) {
1051             int trafo_size_h = 1 << (log2_trafo_size + 1);
1052             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1053             ff_hevc_set_neighbour_available(s, xBase, yBase,
1054                                             trafo_size_h, trafo_size_v);
1055             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1056             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1057             if (s->sps->chroma_format_idc == 2) {
1058                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1059                                                 trafo_size_h, trafo_size_v);
1060                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1061                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1062             }
1063         }
1064     }
1065
1066     return 0;
1067 }
1068
1069 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1070 {
1071     int cb_size          = 1 << log2_cb_size;
1072     int log2_min_pu_size = s->sps->log2_min_pu_size;
1073
1074     int min_pu_width     = s->sps->min_pu_width;
1075     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1076     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1077     int i, j;
1078
1079     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1080         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1081             s->is_pcm[i + j * min_pu_width] = 2;
1082 }
1083
1084 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1085                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1086                               int log2_cb_size, int log2_trafo_size,
1087                               int trafo_depth, int blk_idx)
1088 {
1089     HEVCLocalContext *lc = s->HEVClc;
1090     uint8_t split_transform_flag;
1091     int ret;
1092
1093     if (trafo_depth > 0 && log2_trafo_size == 2) {
1094         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1095             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1096         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1097             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1098         if (s->sps->chroma_format_idc == 2) {
1099             int xBase_cb = xBase & ((1 << log2_trafo_size) - 1);
1100             int yBase_cb = yBase & ((1 << log2_trafo_size) - 1);
1101             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1102                 SAMPLE_CBF2(lc->tt.cbf_cb[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1103             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1104                 SAMPLE_CBF2(lc->tt.cbf_cr[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1105         }
1106     } else {
1107         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1108         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1109         if (s->sps->chroma_format_idc == 2) {
1110             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1111             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) = 0;
1112         }
1113     }
1114
1115     if (lc->cu.intra_split_flag) {
1116         if (trafo_depth == 1) {
1117             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1118             if (s->sps->chroma_format_idc == 3) {
1119                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1120                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1121             } else {
1122                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1123                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1124             }
1125         }
1126     } else {
1127         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1128         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1129         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1130     }
1131
1132     lc->tt.cbf_luma = 1;
1133
1134     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1135                               lc->cu.pred_mode == MODE_INTER &&
1136                               lc->cu.part_mode != PART_2Nx2N &&
1137                               trafo_depth == 0;
1138
1139     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1140         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1141         trafo_depth     < lc->cu.max_trafo_depth       &&
1142         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1143         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1144     } else {
1145         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1146                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1147                                lc->tt.inter_split_flag;
1148     }
1149
1150     if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1151         if (trafo_depth == 0 ||
1152             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1153             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1154                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1155             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1156                 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1157                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1158             }
1159         }
1160
1161         if (trafo_depth == 0 ||
1162             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1163             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1164                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1165             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1166                 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1167                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1168             }
1169         }
1170     }
1171
1172     if (split_transform_flag) {
1173         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1174         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1175
1176         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1177                                  log2_cb_size, log2_trafo_size - 1,
1178                                  trafo_depth + 1, 0);
1179         if (ret < 0)
1180             return ret;
1181         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1182                                  log2_cb_size, log2_trafo_size - 1,
1183                                  trafo_depth + 1, 1);
1184         if (ret < 0)
1185             return ret;
1186         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1187                                  log2_cb_size, log2_trafo_size - 1,
1188                                  trafo_depth + 1, 2);
1189         if (ret < 0)
1190             return ret;
1191         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1192                                  log2_cb_size, log2_trafo_size - 1,
1193                                  trafo_depth + 1, 3);
1194         if (ret < 0)
1195             return ret;
1196     } else {
1197         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1198         int log2_min_tu_size = s->sps->log2_min_tb_size;
1199         int min_tu_width     = s->sps->min_tb_width;
1200
1201         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1202             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1203             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
1204             (s->sps->chroma_format_idc == 2 &&
1205              (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) ||
1206               SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1)))))) {
1207             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1208         }
1209
1210         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1211                                  log2_cb_size, log2_trafo_size, trafo_depth,
1212                                  blk_idx);
1213         if (ret < 0)
1214             return ret;
1215         // TODO: store cbf_luma somewhere else
1216         if (lc->tt.cbf_luma) {
1217             int i, j;
1218             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1219                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1220                     int x_tu = (x0 + j) >> log2_min_tu_size;
1221                     int y_tu = (y0 + i) >> log2_min_tu_size;
1222                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1223                 }
1224         }
1225         if (!s->sh.disable_deblocking_filter_flag) {
1226             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1227             if (s->pps->transquant_bypass_enable_flag &&
1228                 lc->cu.cu_transquant_bypass_flag)
1229                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1230         }
1231     }
1232     return 0;
1233 }
1234
1235 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1236 {
1237     //TODO: non-4:2:0 support
1238     HEVCLocalContext *lc = s->HEVClc;
1239     GetBitContext gb;
1240     int cb_size   = 1 << log2_cb_size;
1241     int stride0   = s->frame->linesize[0];
1242     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1243     int   stride1 = s->frame->linesize[1];
1244     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1245     int   stride2 = s->frame->linesize[2];
1246     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1247
1248     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1249                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1250                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1251                           s->sps->pcm.bit_depth_chroma;
1252     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1253     int ret;
1254
1255     if (!s->sh.disable_deblocking_filter_flag)
1256         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1257
1258     ret = init_get_bits(&gb, pcm, length);
1259     if (ret < 0)
1260         return ret;
1261
1262     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1263     s->hevcdsp.put_pcm(dst1, stride1,
1264                        cb_size >> s->sps->hshift[1],
1265                        cb_size >> s->sps->vshift[1],
1266                        &gb, s->sps->pcm.bit_depth_chroma);
1267     s->hevcdsp.put_pcm(dst2, stride2,
1268                        cb_size >> s->sps->hshift[2],
1269                        cb_size >> s->sps->vshift[2],
1270                        &gb, s->sps->pcm.bit_depth_chroma);
1271     return 0;
1272 }
1273
1274 /**
1275  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1276  *
1277  * @param s HEVC decoding context
1278  * @param dst target buffer for block data at block position
1279  * @param dststride stride of the dst buffer
1280  * @param ref reference picture buffer at origin (0, 0)
1281  * @param mv motion vector (relative to block position) to get pixel data from
1282  * @param x_off horizontal position of block from origin (0, 0)
1283  * @param y_off vertical position of block from origin (0, 0)
1284  * @param block_w width of block
1285  * @param block_h height of block
1286  * @param luma_weight weighting factor applied to the luma prediction
1287  * @param luma_offset additive offset applied to the luma prediction value
1288  */
1289
1290 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1291                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1292                         int block_w, int block_h, int luma_weight, int luma_offset)
1293 {
1294     HEVCLocalContext *lc = s->HEVClc;
1295     uint8_t *src         = ref->data[0];
1296     ptrdiff_t srcstride  = ref->linesize[0];
1297     int pic_width        = s->sps->width;
1298     int pic_height       = s->sps->height;
1299     int mx               = mv->x & 3;
1300     int my               = mv->y & 3;
1301     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1302                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1303     int idx              = ff_hevc_pel_weight[block_w];
1304
1305     x_off += mv->x >> 2;
1306     y_off += mv->y >> 2;
1307     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1308
1309     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1310         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1311         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1312         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1313         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1314         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1315
1316         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1317                                  edge_emu_stride, srcstride,
1318                                  block_w + QPEL_EXTRA,
1319                                  block_h + QPEL_EXTRA,
1320                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1321                                  pic_width, pic_height);
1322         src = lc->edge_emu_buffer + buf_offset;
1323         srcstride = edge_emu_stride;
1324     }
1325
1326     if (!weight_flag)
1327         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1328                                                       block_h, mx, my, block_w);
1329     else
1330         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1331                                                         block_h, s->sh.luma_log2_weight_denom,
1332                                                         luma_weight, luma_offset, mx, my, block_w);
1333 }
1334
1335 /**
1336  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1337  *
1338  * @param s HEVC decoding context
1339  * @param dst target buffer for block data at block position
1340  * @param dststride stride of the dst buffer
1341  * @param ref0 reference picture0 buffer at origin (0, 0)
1342  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1343  * @param x_off horizontal position of block from origin (0, 0)
1344  * @param y_off vertical position of block from origin (0, 0)
1345  * @param block_w width of block
1346  * @param block_h height of block
1347  * @param ref1 reference picture1 buffer at origin (0, 0)
1348  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1349  * @param current_mv current motion vector structure
1350  */
1351  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1352                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1353                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1354 {
1355     HEVCLocalContext *lc = s->HEVClc;
1356     DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1357     ptrdiff_t src0stride  = ref0->linesize[0];
1358     ptrdiff_t src1stride  = ref1->linesize[0];
1359     int pic_width        = s->sps->width;
1360     int pic_height       = s->sps->height;
1361     int mx0              = mv0->x & 3;
1362     int my0              = mv0->y & 3;
1363     int mx1              = mv1->x & 3;
1364     int my1              = mv1->y & 3;
1365     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1366                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1367     int x_off0           = x_off + (mv0->x >> 2);
1368     int y_off0           = y_off + (mv0->y >> 2);
1369     int x_off1           = x_off + (mv1->x >> 2);
1370     int y_off1           = y_off + (mv1->y >> 2);
1371     int idx              = ff_hevc_pel_weight[block_w];
1372
1373     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1374     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1375
1376     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1377         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1378         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1379         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1380         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1381         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1382
1383         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1384                                  edge_emu_stride, src0stride,
1385                                  block_w + QPEL_EXTRA,
1386                                  block_h + QPEL_EXTRA,
1387                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1388                                  pic_width, pic_height);
1389         src0 = lc->edge_emu_buffer + buf_offset;
1390         src0stride = edge_emu_stride;
1391     }
1392
1393     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1394         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1395         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1396         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1397         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1398         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1399
1400         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1401                                  edge_emu_stride, src1stride,
1402                                  block_w + QPEL_EXTRA,
1403                                  block_h + QPEL_EXTRA,
1404                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1405                                  pic_width, pic_height);
1406         src1 = lc->edge_emu_buffer2 + buf_offset;
1407         src1stride = edge_emu_stride;
1408     }
1409
1410     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
1411                                                 block_h, mx0, my0, block_w);
1412     if (!weight_flag)
1413         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1414                                                        block_h, mx1, my1, block_w);
1415     else
1416         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1417                                                          block_h, s->sh.luma_log2_weight_denom,
1418                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1419                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1420                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1421                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1422                                                          mx1, my1, block_w);
1423
1424 }
1425
1426 /**
1427  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1428  *
1429  * @param s HEVC decoding context
1430  * @param dst1 target buffer for block data at block position (U plane)
1431  * @param dst2 target buffer for block data at block position (V plane)
1432  * @param dststride stride of the dst1 and dst2 buffers
1433  * @param ref reference picture buffer at origin (0, 0)
1434  * @param mv motion vector (relative to block position) to get pixel data from
1435  * @param x_off horizontal position of block from origin (0, 0)
1436  * @param y_off vertical position of block from origin (0, 0)
1437  * @param block_w width of block
1438  * @param block_h height of block
1439  * @param chroma_weight weighting factor applied to the chroma prediction
1440  * @param chroma_offset additive offset applied to the chroma prediction value
1441  */
1442
1443 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1444                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1445                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1446 {
1447     HEVCLocalContext *lc = s->HEVClc;
1448     int pic_width        = s->sps->width >> s->sps->hshift[1];
1449     int pic_height       = s->sps->height >> s->sps->vshift[1];
1450     const Mv *mv         = &current_mv->mv[reflist];
1451     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1452                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1453     int idx              = ff_hevc_pel_weight[block_w];
1454     int hshift           = s->sps->hshift[1];
1455     int vshift           = s->sps->vshift[1];
1456     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1457     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1458     intptr_t _mx         = mx << (1 - hshift);
1459     intptr_t _my         = my << (1 - vshift);
1460
1461     x_off += mv->x >> (2 + hshift);
1462     y_off += mv->y >> (2 + vshift);
1463     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1464
1465     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1466         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1467         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1468         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1469         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1470         int buf_offset0 = EPEL_EXTRA_BEFORE *
1471                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1472         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1473                                  edge_emu_stride, srcstride,
1474                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1475                                  x_off - EPEL_EXTRA_BEFORE,
1476                                  y_off - EPEL_EXTRA_BEFORE,
1477                                  pic_width, pic_height);
1478
1479         src0 = lc->edge_emu_buffer + buf_offset0;
1480         srcstride = edge_emu_stride;
1481     }
1482     if (!weight_flag)
1483         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1484                                                   block_h, _mx, _my, block_w);
1485     else
1486         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1487                                                         block_h, s->sh.chroma_log2_weight_denom,
1488                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1489 }
1490
1491 /**
1492  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1493  *
1494  * @param s HEVC decoding context
1495  * @param dst target buffer for block data at block position
1496  * @param dststride stride of the dst buffer
1497  * @param ref0 reference picture0 buffer at origin (0, 0)
1498  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1499  * @param x_off horizontal position of block from origin (0, 0)
1500  * @param y_off vertical position of block from origin (0, 0)
1501  * @param block_w width of block
1502  * @param block_h height of block
1503  * @param ref1 reference picture1 buffer at origin (0, 0)
1504  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1505  * @param current_mv current motion vector structure
1506  * @param cidx chroma component(cb, cr)
1507  */
1508 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1509                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1510 {
1511     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1512     int tmpstride = MAX_PB_SIZE;
1513     HEVCLocalContext *lc = s->HEVClc;
1514     uint8_t *src1        = ref0->data[cidx+1];
1515     uint8_t *src2        = ref1->data[cidx+1];
1516     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1517     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1518     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1519                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1520     int pic_width        = s->sps->width >> s->sps->hshift[1];
1521     int pic_height       = s->sps->height >> s->sps->vshift[1];
1522     Mv *mv0              = &current_mv->mv[0];
1523     Mv *mv1              = &current_mv->mv[1];
1524     int hshift = s->sps->hshift[1];
1525     int vshift = s->sps->vshift[1];
1526
1527     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1528     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1529     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1530     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1531     intptr_t _mx0 = mx0 << (1 - hshift);
1532     intptr_t _my0 = my0 << (1 - vshift);
1533     intptr_t _mx1 = mx1 << (1 - hshift);
1534     intptr_t _my1 = my1 << (1 - vshift);
1535
1536     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1537     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1538     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1539     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1540     int idx = ff_hevc_pel_weight[block_w];
1541     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1542     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1543
1544     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1545         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1546         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1547         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1548         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1549         int buf_offset1 = EPEL_EXTRA_BEFORE *
1550                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1551
1552         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1553                                  edge_emu_stride, src1stride,
1554                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1555                                  x_off0 - EPEL_EXTRA_BEFORE,
1556                                  y_off0 - EPEL_EXTRA_BEFORE,
1557                                  pic_width, pic_height);
1558
1559         src1 = lc->edge_emu_buffer + buf_offset1;
1560         src1stride = edge_emu_stride;
1561     }
1562
1563     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1564         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1565         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1566         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1567         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1568         int buf_offset1 = EPEL_EXTRA_BEFORE *
1569                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1570
1571         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1572                                  edge_emu_stride, src2stride,
1573                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1574                                  x_off1 - EPEL_EXTRA_BEFORE,
1575                                  y_off1 - EPEL_EXTRA_BEFORE,
1576                                  pic_width, pic_height);
1577
1578         src2 = lc->edge_emu_buffer2 + buf_offset1;
1579         src2stride = edge_emu_stride;
1580     }
1581
1582     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
1583                                                 block_h, _mx0, _my0, block_w);
1584     if (!weight_flag)
1585         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1586                                                        src2, src2stride, tmp, tmpstride,
1587                                                        block_h, _mx1, _my1, block_w);
1588     else
1589         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1590                                                          src2, src2stride, tmp, tmpstride,
1591                                                          block_h,
1592                                                          s->sh.chroma_log2_weight_denom,
1593                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1594                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1595                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1596                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1597                                                          _mx1, _my1, block_w);
1598 }
1599
1600 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1601                                 const Mv *mv, int y0, int height)
1602 {
1603     int y = (mv->y >> 2) + y0 + height + 9;
1604
1605     if (s->threads_type == FF_THREAD_FRAME )
1606         ff_thread_await_progress(&ref->tf, y, 0);
1607 }
1608
1609 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1610                                 int nPbW, int nPbH,
1611                                 int log2_cb_size, int partIdx, int idx)
1612 {
1613 #define POS(c_idx, x, y)                                                              \
1614     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1615                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1616     HEVCLocalContext *lc = s->HEVClc;
1617     int merge_idx = 0;
1618     struct MvField current_mv = {{{ 0 }}};
1619
1620     int min_pu_width = s->sps->min_pu_width;
1621
1622     MvField *tab_mvf = s->ref->tab_mvf;
1623     RefPicList  *refPicList = s->ref->refPicList;
1624     HEVCFrame *ref0, *ref1;
1625     uint8_t *dst0 = POS(0, x0, y0);
1626     uint8_t *dst1 = POS(1, x0, y0);
1627     uint8_t *dst2 = POS(2, x0, y0);
1628     int log2_min_cb_size = s->sps->log2_min_cb_size;
1629     int min_cb_width     = s->sps->min_cb_width;
1630     int x_cb             = x0 >> log2_min_cb_size;
1631     int y_cb             = y0 >> log2_min_cb_size;
1632     int ref_idx[2];
1633     int mvp_flag[2];
1634     int x_pu, y_pu;
1635     int i, j;
1636
1637     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1638         if (s->sh.max_num_merge_cand > 1)
1639             merge_idx = ff_hevc_merge_idx_decode(s);
1640         else
1641             merge_idx = 0;
1642
1643         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1644                                    1 << log2_cb_size,
1645                                    1 << log2_cb_size,
1646                                    log2_cb_size, partIdx,
1647                                    merge_idx, &current_mv);
1648         x_pu = x0 >> s->sps->log2_min_pu_size;
1649         y_pu = y0 >> s->sps->log2_min_pu_size;
1650
1651         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1652             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1653                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1654     } else { /* MODE_INTER */
1655         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1656         if (lc->pu.merge_flag) {
1657             if (s->sh.max_num_merge_cand > 1)
1658                 merge_idx = ff_hevc_merge_idx_decode(s);
1659             else
1660                 merge_idx = 0;
1661
1662             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1663                                        partIdx, merge_idx, &current_mv);
1664             x_pu = x0 >> s->sps->log2_min_pu_size;
1665             y_pu = y0 >> s->sps->log2_min_pu_size;
1666
1667             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1668                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1669                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1670         } else {
1671             enum InterPredIdc inter_pred_idc = PRED_L0;
1672             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1673             current_mv.pred_flag = 0;
1674             if (s->sh.slice_type == B_SLICE)
1675                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1676
1677             if (inter_pred_idc != PRED_L1) {
1678                 if (s->sh.nb_refs[L0]) {
1679                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1680                     current_mv.ref_idx[0] = ref_idx[0];
1681                 }
1682                 current_mv.pred_flag = PF_L0;
1683                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1684                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1685                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1686                                          partIdx, merge_idx, &current_mv,
1687                                          mvp_flag[0], 0);
1688                 current_mv.mv[0].x += lc->pu.mvd.x;
1689                 current_mv.mv[0].y += lc->pu.mvd.y;
1690             }
1691
1692             if (inter_pred_idc != PRED_L0) {
1693                 if (s->sh.nb_refs[L1]) {
1694                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1695                     current_mv.ref_idx[1] = ref_idx[1];
1696                 }
1697
1698                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1699                     lc->pu.mvd.x = 0;
1700                     lc->pu.mvd.y = 0;
1701                 } else {
1702                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1703                 }
1704
1705                 current_mv.pred_flag += PF_L1;
1706                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1707                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1708                                          partIdx, merge_idx, &current_mv,
1709                                          mvp_flag[1], 1);
1710                 current_mv.mv[1].x += lc->pu.mvd.x;
1711                 current_mv.mv[1].y += lc->pu.mvd.y;
1712             }
1713
1714             x_pu = x0 >> s->sps->log2_min_pu_size;
1715             y_pu = y0 >> s->sps->log2_min_pu_size;
1716
1717             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1718                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1719                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1720         }
1721     }
1722
1723     if (current_mv.pred_flag & PF_L0) {
1724         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1725         if (!ref0)
1726             return;
1727         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1728     }
1729     if (current_mv.pred_flag & PF_L1) {
1730         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1731         if (!ref1)
1732             return;
1733         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1734     }
1735
1736     if (current_mv.pred_flag == PF_L0) {
1737         int x0_c = x0 >> s->sps->hshift[1];
1738         int y0_c = y0 >> s->sps->vshift[1];
1739         int nPbW_c = nPbW >> s->sps->hshift[1];
1740         int nPbH_c = nPbH >> s->sps->vshift[1];
1741
1742         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1743                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1744                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1745                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1746
1747         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1748                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1749                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1750         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1751                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1752                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1753     } else if (current_mv.pred_flag == PF_L1) {
1754         int x0_c = x0 >> s->sps->hshift[1];
1755         int y0_c = y0 >> s->sps->vshift[1];
1756         int nPbW_c = nPbW >> s->sps->hshift[1];
1757         int nPbH_c = nPbH >> s->sps->vshift[1];
1758
1759         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1760                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1761                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1762                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1763
1764         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1765                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1766                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1767
1768         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1769                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1770                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1771     } else if (current_mv.pred_flag == PF_BI) {
1772         int x0_c = x0 >> s->sps->hshift[1];
1773         int y0_c = y0 >> s->sps->vshift[1];
1774         int nPbW_c = nPbW >> s->sps->hshift[1];
1775         int nPbH_c = nPbH >> s->sps->vshift[1];
1776
1777         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1778                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1779                    ref1->frame, &current_mv.mv[1], &current_mv);
1780
1781         chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1782                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1783
1784         chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1785                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1786     }
1787 }
1788
1789 /**
1790  * 8.4.1
1791  */
1792 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1793                                 int prev_intra_luma_pred_flag)
1794 {
1795     HEVCLocalContext *lc = s->HEVClc;
1796     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1797     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1798     int min_pu_width     = s->sps->min_pu_width;
1799     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1800     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1801     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1802
1803     int cand_up   = (lc->ctb_up_flag || y0b) ?
1804                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1805     int cand_left = (lc->ctb_left_flag || x0b) ?
1806                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1807
1808     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1809
1810     MvField *tab_mvf = s->ref->tab_mvf;
1811     int intra_pred_mode;
1812     int candidate[3];
1813     int i, j;
1814
1815     // intra_pred_mode prediction does not cross vertical CTB boundaries
1816     if ((y0 - 1) < y_ctb)
1817         cand_up = INTRA_DC;
1818
1819     if (cand_left == cand_up) {
1820         if (cand_left < 2) {
1821             candidate[0] = INTRA_PLANAR;
1822             candidate[1] = INTRA_DC;
1823             candidate[2] = INTRA_ANGULAR_26;
1824         } else {
1825             candidate[0] = cand_left;
1826             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1827             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1828         }
1829     } else {
1830         candidate[0] = cand_left;
1831         candidate[1] = cand_up;
1832         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1833             candidate[2] = INTRA_PLANAR;
1834         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1835             candidate[2] = INTRA_DC;
1836         } else {
1837             candidate[2] = INTRA_ANGULAR_26;
1838         }
1839     }
1840
1841     if (prev_intra_luma_pred_flag) {
1842         intra_pred_mode = candidate[lc->pu.mpm_idx];
1843     } else {
1844         if (candidate[0] > candidate[1])
1845             FFSWAP(uint8_t, candidate[0], candidate[1]);
1846         if (candidate[0] > candidate[2])
1847             FFSWAP(uint8_t, candidate[0], candidate[2]);
1848         if (candidate[1] > candidate[2])
1849             FFSWAP(uint8_t, candidate[1], candidate[2]);
1850
1851         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1852         for (i = 0; i < 3; i++)
1853             if (intra_pred_mode >= candidate[i])
1854                 intra_pred_mode++;
1855     }
1856
1857     /* write the intra prediction units into the mv array */
1858     if (!size_in_pus)
1859         size_in_pus = 1;
1860     for (i = 0; i < size_in_pus; i++) {
1861         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1862                intra_pred_mode, size_in_pus);
1863
1864         for (j = 0; j < size_in_pus; j++) {
1865             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1866         }
1867     }
1868
1869     return intra_pred_mode;
1870 }
1871
1872 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1873                                           int log2_cb_size, int ct_depth)
1874 {
1875     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1876     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1877     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1878     int y;
1879
1880     for (y = 0; y < length; y++)
1881         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1882                ct_depth, length);
1883 }
1884
1885 static const uint8_t tab_mode_idx[] = {
1886      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1887     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1888
1889 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1890                                   int log2_cb_size)
1891 {
1892     HEVCLocalContext *lc = s->HEVClc;
1893     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1894     uint8_t prev_intra_luma_pred_flag[4];
1895     int split   = lc->cu.part_mode == PART_NxN;
1896     int pb_size = (1 << log2_cb_size) >> split;
1897     int side    = split + 1;
1898     int chroma_mode;
1899     int i, j;
1900
1901     for (i = 0; i < side; i++)
1902         for (j = 0; j < side; j++)
1903             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1904
1905     for (i = 0; i < side; i++) {
1906         for (j = 0; j < side; j++) {
1907             if (prev_intra_luma_pred_flag[2 * i + j])
1908                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1909             else
1910                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1911
1912             lc->pu.intra_pred_mode[2 * i + j] =
1913                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1914                                      prev_intra_luma_pred_flag[2 * i + j]);
1915         }
1916     }
1917
1918     if (s->sps->chroma_format_idc == 3) {
1919         for (i = 0; i < side; i++) {
1920             for (j = 0; j < side; j++) {
1921                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1922                 if (chroma_mode != 4) {
1923                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1924                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1925                     else
1926                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1927                 } else {
1928                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1929                 }
1930             }
1931         }
1932     } else if (s->sps->chroma_format_idc == 2) {
1933         int mode_idx;
1934         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1935         if (chroma_mode != 4) {
1936             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1937                 mode_idx = 34;
1938             else
1939                 mode_idx = intra_chroma_table[chroma_mode];
1940         } else {
1941             mode_idx = lc->pu.intra_pred_mode[0];
1942         }
1943         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1944     } else if (s->sps->chroma_format_idc != 0) {
1945         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1946         if (chroma_mode != 4) {
1947             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1948                 lc->pu.intra_pred_mode_c[0] = 34;
1949             else
1950                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1951         } else {
1952             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1953         }
1954     }
1955 }
1956
1957 static void intra_prediction_unit_default_value(HEVCContext *s,
1958                                                 int x0, int y0,
1959                                                 int log2_cb_size)
1960 {
1961     HEVCLocalContext *lc = s->HEVClc;
1962     int pb_size          = 1 << log2_cb_size;
1963     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1964     int min_pu_width     = s->sps->min_pu_width;
1965     MvField *tab_mvf     = s->ref->tab_mvf;
1966     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1967     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1968     int j, k;
1969
1970     if (size_in_pus == 0)
1971         size_in_pus = 1;
1972     for (j = 0; j < size_in_pus; j++)
1973         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1974     if (lc->cu.pred_mode == MODE_INTRA)
1975         for (j = 0; j < size_in_pus; j++)
1976             for (k = 0; k < size_in_pus; k++)
1977                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1978 }
1979
1980 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1981 {
1982     int cb_size          = 1 << log2_cb_size;
1983     HEVCLocalContext *lc = s->HEVClc;
1984     int log2_min_cb_size = s->sps->log2_min_cb_size;
1985     int length           = cb_size >> log2_min_cb_size;
1986     int min_cb_width     = s->sps->min_cb_width;
1987     int x_cb             = x0 >> log2_min_cb_size;
1988     int y_cb             = y0 >> log2_min_cb_size;
1989     int idx              = log2_cb_size - 2;
1990     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1991     int x, y, ret;
1992
1993     lc->cu.x                = x0;
1994     lc->cu.y                = y0;
1995     lc->cu.rqt_root_cbf     = 1;
1996     lc->cu.pred_mode        = MODE_INTRA;
1997     lc->cu.part_mode        = PART_2Nx2N;
1998     lc->cu.intra_split_flag = 0;
1999     lc->cu.pcm_flag         = 0;
2000
2001     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2002     for (x = 0; x < 4; x++)
2003         lc->pu.intra_pred_mode[x] = 1;
2004     if (s->pps->transquant_bypass_enable_flag) {
2005         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2006         if (lc->cu.cu_transquant_bypass_flag)
2007             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2008     } else
2009         lc->cu.cu_transquant_bypass_flag = 0;
2010
2011     if (s->sh.slice_type != I_SLICE) {
2012         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2013
2014         x = y_cb * min_cb_width + x_cb;
2015         for (y = 0; y < length; y++) {
2016             memset(&s->skip_flag[x], skip_flag, length);
2017             x += min_cb_width;
2018         }
2019         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2020     }
2021
2022     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2023         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2024         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2025
2026         if (!s->sh.disable_deblocking_filter_flag)
2027             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2028     } else {
2029         if (s->sh.slice_type != I_SLICE)
2030             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2031         if (lc->cu.pred_mode != MODE_INTRA ||
2032             log2_cb_size == s->sps->log2_min_cb_size) {
2033             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2034             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2035                                       lc->cu.pred_mode == MODE_INTRA;
2036         }
2037
2038         if (lc->cu.pred_mode == MODE_INTRA) {
2039             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2040                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2041                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2042                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2043             }
2044             if (lc->cu.pcm_flag) {
2045                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2046                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2047                 if (s->sps->pcm.loop_filter_disable_flag)
2048                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2049
2050                 if (ret < 0)
2051                     return ret;
2052             } else {
2053                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2054             }
2055         } else {
2056             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2057             switch (lc->cu.part_mode) {
2058             case PART_2Nx2N:
2059                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2060                 break;
2061             case PART_2NxN:
2062                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2063                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2064                 break;
2065             case PART_Nx2N:
2066                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2067                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2068                 break;
2069             case PART_2NxnU:
2070                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2071                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2072                 break;
2073             case PART_2NxnD:
2074                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2075                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2076                 break;
2077             case PART_nLx2N:
2078                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2079                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2080                 break;
2081             case PART_nRx2N:
2082                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2083                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2084                 break;
2085             case PART_NxN:
2086                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2087                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2088                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2089                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2090                 break;
2091             }
2092         }
2093
2094         if (!lc->cu.pcm_flag) {
2095             if (lc->cu.pred_mode != MODE_INTRA &&
2096                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2097                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2098             }
2099             if (lc->cu.rqt_root_cbf) {
2100                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2101                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2102                                          s->sps->max_transform_hierarchy_depth_inter;
2103                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2104                                          log2_cb_size,
2105                                          log2_cb_size, 0, 0);
2106                 if (ret < 0)
2107                     return ret;
2108             } else {
2109                 if (!s->sh.disable_deblocking_filter_flag)
2110                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2111             }
2112         }
2113     }
2114
2115     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2116         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2117
2118     x = y_cb * min_cb_width + x_cb;
2119     for (y = 0; y < length; y++) {
2120         memset(&s->qp_y_tab[x], lc->qp_y, length);
2121         x += min_cb_width;
2122     }
2123
2124     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2125        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2126         lc->qPy_pred = lc->qp_y;
2127     }
2128
2129     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2130
2131     return 0;
2132 }
2133
2134 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2135                                int log2_cb_size, int cb_depth)
2136 {
2137     HEVCLocalContext *lc = s->HEVClc;
2138     const int cb_size    = 1 << log2_cb_size;
2139     int ret;
2140     int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2141     int split_cu_flag;
2142
2143     lc->ct.depth = cb_depth;
2144     if (x0 + cb_size <= s->sps->width  &&
2145         y0 + cb_size <= s->sps->height &&
2146         log2_cb_size > s->sps->log2_min_cb_size) {
2147         split_cu_flag = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2148     } else {
2149         split_cu_flag = (log2_cb_size > s->sps->log2_min_cb_size);
2150     }
2151     if (s->pps->cu_qp_delta_enabled_flag &&
2152         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2153         lc->tu.is_cu_qp_delta_coded = 0;
2154         lc->tu.cu_qp_delta          = 0;
2155     }
2156
2157     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2158         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2159         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2160     }
2161
2162     if (split_cu_flag) {
2163         const int cb_size_split = cb_size >> 1;
2164         const int x1 = x0 + cb_size_split;
2165         const int y1 = y0 + cb_size_split;
2166
2167         int more_data = 0;
2168
2169         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2170         if (more_data < 0)
2171             return more_data;
2172
2173         if (more_data && x1 < s->sps->width) {
2174             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2175             if (more_data < 0)
2176                 return more_data;
2177         }
2178         if (more_data && y1 < s->sps->height) {
2179             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2180             if (more_data < 0)
2181                 return more_data;
2182         }
2183         if (more_data && x1 < s->sps->width &&
2184             y1 < s->sps->height) {
2185             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2186             if (more_data < 0)
2187                 return more_data;
2188         }
2189
2190         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2191             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2192             lc->qPy_pred = lc->qp_y;
2193
2194         if (more_data)
2195             return ((x1 + cb_size_split) < s->sps->width ||
2196                     (y1 + cb_size_split) < s->sps->height);
2197         else
2198             return 0;
2199     } else {
2200         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2201         if (ret < 0)
2202             return ret;
2203         if ((!((x0 + cb_size) %
2204                (1 << (s->sps->log2_ctb_size))) ||
2205              (x0 + cb_size >= s->sps->width)) &&
2206             (!((y0 + cb_size) %
2207                (1 << (s->sps->log2_ctb_size))) ||
2208              (y0 + cb_size >= s->sps->height))) {
2209             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2210             return !end_of_slice_flag;
2211         } else {
2212             return 1;
2213         }
2214     }
2215
2216     return 0;
2217 }
2218
2219 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2220                                  int ctb_addr_ts)
2221 {
2222     HEVCLocalContext *lc  = s->HEVClc;
2223     int ctb_size          = 1 << s->sps->log2_ctb_size;
2224     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2225     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2226
2227     int tile_left_boundary, tile_up_boundary;
2228     int slice_left_boundary, slice_up_boundary;
2229
2230     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2231
2232     if (s->pps->entropy_coding_sync_enabled_flag) {
2233         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2234             lc->first_qp_group = 1;
2235         lc->end_of_tiles_x = s->sps->width;
2236     } else if (s->pps->tiles_enabled_flag) {
2237         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2238             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2239             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2240             lc->first_qp_group   = 1;
2241         }
2242     } else {
2243         lc->end_of_tiles_x = s->sps->width;
2244     }
2245
2246     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2247
2248     if (s->pps->tiles_enabled_flag) {
2249         tile_left_boundary = x_ctb > 0 &&
2250                              s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
2251         slice_left_boundary = x_ctb > 0 &&
2252                               s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1];
2253         tile_up_boundary  = y_ctb > 0 &&
2254                             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2255         slice_up_boundary = y_ctb > 0 &&
2256                             s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2257     } else {
2258         tile_left_boundary =
2259         tile_up_boundary   = 0;
2260         slice_left_boundary = ctb_addr_in_slice <= 0;
2261         slice_up_boundary   = ctb_addr_in_slice < s->sps->ctb_width;
2262     }
2263     lc->slice_or_tiles_left_boundary = slice_left_boundary + (tile_left_boundary << 1);
2264     lc->slice_or_tiles_up_boundary   = slice_up_boundary   + (tile_up_boundary   << 1);
2265     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0)                  && !tile_left_boundary);
2266     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !tile_up_boundary);
2267     lc->ctb_up_right_flag = ((y_ctb > 0)                 && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2268     lc->ctb_up_left_flag  = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2269 }
2270
2271 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2272 {
2273     HEVCContext *s  = avctxt->priv_data;
2274     int ctb_size    = 1 << s->sps->log2_ctb_size;
2275     int more_data   = 1;
2276     int x_ctb       = 0;
2277     int y_ctb       = 0;
2278     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2279
2280     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2281         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2282         return AVERROR_INVALIDDATA;
2283     }
2284
2285     if (s->sh.dependent_slice_segment_flag) {
2286         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2287         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2288             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2289             return AVERROR_INVALIDDATA;
2290         }
2291     }
2292
2293     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2294         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2295
2296         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2297         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2298         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2299
2300         ff_hevc_cabac_init(s, ctb_addr_ts);
2301
2302         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2303
2304         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2305         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2306         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2307
2308         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2309         if (more_data < 0) {
2310             s->tab_slice_address[ctb_addr_rs] = -1;
2311             return more_data;
2312         }
2313
2314
2315         ctb_addr_ts++;
2316         ff_hevc_save_states(s, ctb_addr_ts);
2317         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2318     }
2319
2320     if (x_ctb + ctb_size >= s->sps->width &&
2321         y_ctb + ctb_size >= s->sps->height)
2322         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2323
2324     return ctb_addr_ts;
2325 }
2326
2327 static int hls_slice_data(HEVCContext *s)
2328 {
2329     int arg[2];
2330     int ret[2];
2331
2332     arg[0] = 0;
2333     arg[1] = 1;
2334
2335     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2336     return ret[0];
2337 }
2338 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2339 {
2340     HEVCContext *s1  = avctxt->priv_data, *s;
2341     HEVCLocalContext *lc;
2342     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2343     int more_data   = 1;
2344     int *ctb_row_p    = input_ctb_row;
2345     int ctb_row = ctb_row_p[job];
2346     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2347     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2348     int thread = ctb_row % s1->threads_number;
2349     int ret;
2350
2351     s = s1->sList[self_id];
2352     lc = s->HEVClc;
2353
2354     if(ctb_row) {
2355         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2356
2357         if (ret < 0)
2358             return ret;
2359         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2360     }
2361
2362     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2363         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2364         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2365
2366         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2367
2368         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2369
2370         if (avpriv_atomic_int_get(&s1->wpp_err)){
2371             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2372             return 0;
2373         }
2374
2375         ff_hevc_cabac_init(s, ctb_addr_ts);
2376         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2377         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2378
2379         if (more_data < 0) {
2380             s->tab_slice_address[ctb_addr_rs] = -1;
2381             return more_data;
2382         }
2383
2384         ctb_addr_ts++;
2385
2386         ff_hevc_save_states(s, ctb_addr_ts);
2387         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2388         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2389
2390         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2391             avpriv_atomic_int_set(&s1->wpp_err,  1);
2392             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2393             return 0;
2394         }
2395
2396         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2397             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2398             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2399             return ctb_addr_ts;
2400         }
2401         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2402         x_ctb+=ctb_size;
2403
2404         if(x_ctb >= s->sps->width) {
2405             break;
2406         }
2407     }
2408     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2409
2410     return 0;
2411 }
2412
2413 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2414 {
2415     HEVCLocalContext *lc = s->HEVClc;
2416     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2417     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2418     int offset;
2419     int startheader, cmpt = 0;
2420     int i, j, res = 0;
2421
2422
2423     if (!s->sList[1]) {
2424         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2425
2426
2427         for (i = 1; i < s->threads_number; i++) {
2428             s->sList[i] = av_malloc(sizeof(HEVCContext));
2429             memcpy(s->sList[i], s, sizeof(HEVCContext));
2430             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2431             s->sList[i]->HEVClc = s->HEVClcList[i];
2432         }
2433     }
2434
2435     offset = (lc->gb.index >> 3);
2436
2437     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2438         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2439             startheader--;
2440             cmpt++;
2441         }
2442     }
2443
2444     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2445         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2446         for (j = 0, cmpt = 0, startheader = offset
2447              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2448             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2449                 startheader--;
2450                 cmpt++;
2451             }
2452         }
2453         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2454         s->sh.offset[i - 1] = offset;
2455
2456     }
2457     if (s->sh.num_entry_point_offsets != 0) {
2458         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2459         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2460         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2461
2462     }
2463     s->data = nal;
2464
2465     for (i = 1; i < s->threads_number; i++) {
2466         s->sList[i]->HEVClc->first_qp_group = 1;
2467         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2468         memcpy(s->sList[i], s, sizeof(HEVCContext));
2469         s->sList[i]->HEVClc = s->HEVClcList[i];
2470     }
2471
2472     avpriv_atomic_int_set(&s->wpp_err, 0);
2473     ff_reset_entries(s->avctx);
2474
2475     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2476         arg[i] = i;
2477         ret[i] = 0;
2478     }
2479
2480     if (s->pps->entropy_coding_sync_enabled_flag)
2481         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2482
2483     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2484         res += ret[i];
2485     av_free(ret);
2486     av_free(arg);
2487     return res;
2488 }
2489
2490 /**
2491  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2492  * 0 if the unit should be skipped, 1 otherwise
2493  */
2494 static int hls_nal_unit(HEVCContext *s)
2495 {
2496     GetBitContext *gb = &s->HEVClc->gb;
2497     int nuh_layer_id;
2498
2499     if (get_bits1(gb) != 0)
2500         return AVERROR_INVALIDDATA;
2501
2502     s->nal_unit_type = get_bits(gb, 6);
2503
2504     nuh_layer_id   = get_bits(gb, 6);
2505     s->temporal_id = get_bits(gb, 3) - 1;
2506     if (s->temporal_id < 0)
2507         return AVERROR_INVALIDDATA;
2508
2509     av_log(s->avctx, AV_LOG_DEBUG,
2510            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2511            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2512
2513     return nuh_layer_id == 0;
2514 }
2515
2516 static int set_side_data(HEVCContext *s)
2517 {
2518     AVFrame *out = s->ref->frame;
2519
2520     if (s->sei_frame_packing_present &&
2521         s->frame_packing_arrangement_type >= 3 &&
2522         s->frame_packing_arrangement_type <= 5 &&
2523         s->content_interpretation_type > 0 &&
2524         s->content_interpretation_type < 3) {
2525         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2526         if (!stereo)
2527             return AVERROR(ENOMEM);
2528
2529         switch (s->frame_packing_arrangement_type) {
2530         case 3:
2531             if (s->quincunx_subsampling)
2532                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2533             else
2534                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2535             break;
2536         case 4:
2537             stereo->type = AV_STEREO3D_TOPBOTTOM;
2538             break;
2539         case 5:
2540             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2541             break;
2542         }
2543
2544         if (s->content_interpretation_type == 2)
2545             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2546     }
2547
2548     if (s->sei_display_orientation_present &&
2549         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2550         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2551         AVFrameSideData *rotation = av_frame_new_side_data(out,
2552                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2553                                                            sizeof(int32_t) * 9);
2554         if (!rotation)
2555             return AVERROR(ENOMEM);
2556
2557         av_display_rotation_set((int32_t *)rotation->data, angle);
2558         av_display_matrix_flip((int32_t *)rotation->data,
2559                                s->sei_vflip, s->sei_hflip);
2560     }
2561
2562     return 0;
2563 }
2564
2565 static int hevc_frame_start(HEVCContext *s)
2566 {
2567     HEVCLocalContext *lc = s->HEVClc;
2568     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2569                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2570     int ret;
2571     AVFrame *cur_frame;
2572
2573     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2574     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2575     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2576     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2577     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2578
2579     s->is_decoded        = 0;
2580     s->first_nal_type    = s->nal_unit_type;
2581
2582     if (s->pps->tiles_enabled_flag)
2583         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2584
2585     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2586                               s->poc);
2587     if (ret < 0)
2588         goto fail;
2589
2590     ret = ff_hevc_frame_rps(s);
2591     if (ret < 0) {
2592         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2593         goto fail;
2594     }
2595
2596     s->ref->frame->key_frame = IS_IRAP(s);
2597
2598     ret = set_side_data(s);
2599     if (ret < 0)
2600         goto fail;
2601
2602     cur_frame = s->sps->sao_enabled ? s->sao_frame : s->frame;
2603     cur_frame->pict_type = 3 - s->sh.slice_type;
2604
2605     av_frame_unref(s->output_frame);
2606     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2607     if (ret < 0)
2608         goto fail;
2609
2610     ff_thread_finish_setup(s->avctx);
2611
2612     return 0;
2613
2614 fail:
2615     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2616         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2617     s->ref = NULL;
2618     return ret;
2619 }
2620
2621 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2622 {
2623     HEVCLocalContext *lc = s->HEVClc;
2624     GetBitContext *gb    = &lc->gb;
2625     int ctb_addr_ts, ret;
2626
2627     ret = init_get_bits8(gb, nal, length);
2628     if (ret < 0)
2629         return ret;
2630
2631     ret = hls_nal_unit(s);
2632     if (ret < 0) {
2633         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2634                s->nal_unit_type);
2635         goto fail;
2636     } else if (!ret)
2637         return 0;
2638
2639     switch (s->nal_unit_type) {
2640     case NAL_VPS:
2641         ret = ff_hevc_decode_nal_vps(s);
2642         if (ret < 0)
2643             goto fail;
2644         break;
2645     case NAL_SPS:
2646         ret = ff_hevc_decode_nal_sps(s);
2647         if (ret < 0)
2648             goto fail;
2649         break;
2650     case NAL_PPS:
2651         ret = ff_hevc_decode_nal_pps(s);
2652         if (ret < 0)
2653             goto fail;
2654         break;
2655     case NAL_SEI_PREFIX:
2656     case NAL_SEI_SUFFIX:
2657         ret = ff_hevc_decode_nal_sei(s);
2658         if (ret < 0)
2659             goto fail;
2660         break;
2661     case NAL_TRAIL_R:
2662     case NAL_TRAIL_N:
2663     case NAL_TSA_N:
2664     case NAL_TSA_R:
2665     case NAL_STSA_N:
2666     case NAL_STSA_R:
2667     case NAL_BLA_W_LP:
2668     case NAL_BLA_W_RADL:
2669     case NAL_BLA_N_LP:
2670     case NAL_IDR_W_RADL:
2671     case NAL_IDR_N_LP:
2672     case NAL_CRA_NUT:
2673     case NAL_RADL_N:
2674     case NAL_RADL_R:
2675     case NAL_RASL_N:
2676     case NAL_RASL_R:
2677         ret = hls_slice_header(s);
2678         if (ret < 0)
2679             return ret;
2680
2681         if (s->max_ra == INT_MAX) {
2682             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2683                 s->max_ra = s->poc;
2684             } else {
2685                 if (IS_IDR(s))
2686                     s->max_ra = INT_MIN;
2687             }
2688         }
2689
2690         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2691             s->poc <= s->max_ra) {
2692             s->is_decoded = 0;
2693             break;
2694         } else {
2695             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2696                 s->max_ra = INT_MIN;
2697         }
2698
2699         if (s->sh.first_slice_in_pic_flag) {
2700             ret = hevc_frame_start(s);
2701             if (ret < 0)
2702                 return ret;
2703         } else if (!s->ref) {
2704             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2705             goto fail;
2706         }
2707
2708         if (s->nal_unit_type != s->first_nal_type) {
2709             av_log(s->avctx, AV_LOG_ERROR,
2710                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2711                    s->first_nal_type, s->nal_unit_type);
2712             return AVERROR_INVALIDDATA;
2713         }
2714
2715         if (!s->sh.dependent_slice_segment_flag &&
2716             s->sh.slice_type != I_SLICE) {
2717             ret = ff_hevc_slice_rpl(s);
2718             if (ret < 0) {
2719                 av_log(s->avctx, AV_LOG_WARNING,
2720                        "Error constructing the reference lists for the current slice.\n");
2721                 goto fail;
2722             }
2723         }
2724
2725         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2726             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2727         else
2728             ctb_addr_ts = hls_slice_data(s);
2729         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2730             s->is_decoded = 1;
2731         }
2732
2733         if (ctb_addr_ts < 0) {
2734             ret = ctb_addr_ts;
2735             goto fail;
2736         }
2737         break;
2738     case NAL_EOS_NUT:
2739     case NAL_EOB_NUT:
2740         s->seq_decode = (s->seq_decode + 1) & 0xff;
2741         s->max_ra     = INT_MAX;
2742         break;
2743     case NAL_AUD:
2744     case NAL_FD_NUT:
2745         break;
2746     default:
2747         av_log(s->avctx, AV_LOG_INFO,
2748                "Skipping NAL unit %d\n", s->nal_unit_type);
2749     }
2750
2751     return 0;
2752 fail:
2753     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2754         return ret;
2755     return 0;
2756 }
2757
2758 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2759  * between these functions would be nice. */
2760 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2761                          HEVCNAL *nal)
2762 {
2763     int i, si, di;
2764     uint8_t *dst;
2765
2766     s->skipped_bytes = 0;
2767 #define STARTCODE_TEST                                                  \
2768         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2769             if (src[i + 2] != 3) {                                      \
2770                 /* startcode, so we must be past the end */             \
2771                 length = i;                                             \
2772             }                                                           \
2773             break;                                                      \
2774         }
2775 #if HAVE_FAST_UNALIGNED
2776 #define FIND_FIRST_ZERO                                                 \
2777         if (i > 0 && !src[i])                                           \
2778             i--;                                                        \
2779         while (src[i])                                                  \
2780             i++
2781 #if HAVE_FAST_64BIT
2782     for (i = 0; i + 1 < length; i += 9) {
2783         if (!((~AV_RN64A(src + i) &
2784                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2785               0x8000800080008080ULL))
2786             continue;
2787         FIND_FIRST_ZERO;
2788         STARTCODE_TEST;
2789         i -= 7;
2790     }
2791 #else
2792     for (i = 0; i + 1 < length; i += 5) {
2793         if (!((~AV_RN32A(src + i) &
2794                (AV_RN32A(src + i) - 0x01000101U)) &
2795               0x80008080U))
2796             continue;
2797         FIND_FIRST_ZERO;
2798         STARTCODE_TEST;
2799         i -= 3;
2800     }
2801 #endif /* HAVE_FAST_64BIT */
2802 #else
2803     for (i = 0; i + 1 < length; i += 2) {
2804         if (src[i])
2805             continue;
2806         if (i > 0 && src[i - 1] == 0)
2807             i--;
2808         STARTCODE_TEST;
2809     }
2810 #endif /* HAVE_FAST_UNALIGNED */
2811
2812     if (i >= length - 1) { // no escaped 0
2813         nal->data = src;
2814         nal->size = length;
2815         return length;
2816     }
2817
2818     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2819                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2820     if (!nal->rbsp_buffer)
2821         return AVERROR(ENOMEM);
2822
2823     dst = nal->rbsp_buffer;
2824
2825     memcpy(dst, src, i);
2826     si = di = i;
2827     while (si + 2 < length) {
2828         // remove escapes (very rare 1:2^22)
2829         if (src[si + 2] > 3) {
2830             dst[di++] = src[si++];
2831             dst[di++] = src[si++];
2832         } else if (src[si] == 0 && src[si + 1] == 0) {
2833             if (src[si + 2] == 3) { // escape
2834                 dst[di++] = 0;
2835                 dst[di++] = 0;
2836                 si       += 3;
2837
2838                 s->skipped_bytes++;
2839                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2840                     s->skipped_bytes_pos_size *= 2;
2841                     av_reallocp_array(&s->skipped_bytes_pos,
2842                             s->skipped_bytes_pos_size,
2843                             sizeof(*s->skipped_bytes_pos));
2844                     if (!s->skipped_bytes_pos)
2845                         return AVERROR(ENOMEM);
2846                 }
2847                 if (s->skipped_bytes_pos)
2848                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2849                 continue;
2850             } else // next start code
2851                 goto nsc;
2852         }
2853
2854         dst[di++] = src[si++];
2855     }
2856     while (si < length)
2857         dst[di++] = src[si++];
2858
2859 nsc:
2860     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2861
2862     nal->data = dst;
2863     nal->size = di;
2864     return si;
2865 }
2866
2867 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2868 {
2869     int i, consumed, ret = 0;
2870
2871     s->ref = NULL;
2872     s->last_eos = s->eos;
2873     s->eos = 0;
2874
2875     /* split the input packet into NAL units, so we know the upper bound on the
2876      * number of slices in the frame */
2877     s->nb_nals = 0;
2878     while (length >= 4) {
2879         HEVCNAL *nal;
2880         int extract_length = 0;
2881
2882         if (s->is_nalff) {
2883             int i;
2884             for (i = 0; i < s->nal_length_size; i++)
2885                 extract_length = (extract_length << 8) | buf[i];
2886             buf    += s->nal_length_size;
2887             length -= s->nal_length_size;
2888
2889             if (extract_length > length) {
2890                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2891                 ret = AVERROR_INVALIDDATA;
2892                 goto fail;
2893             }
2894         } else {
2895             /* search start code */
2896             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2897                 ++buf;
2898                 --length;
2899                 if (length < 4) {
2900                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2901                     ret = AVERROR_INVALIDDATA;
2902                     goto fail;
2903                 }
2904             }
2905
2906             buf           += 3;
2907             length        -= 3;
2908         }
2909
2910         if (!s->is_nalff)
2911             extract_length = length;
2912
2913         if (s->nals_allocated < s->nb_nals + 1) {
2914             int new_size = s->nals_allocated + 1;
2915             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2916             if (!tmp) {
2917                 ret = AVERROR(ENOMEM);
2918                 goto fail;
2919             }
2920             s->nals = tmp;
2921             memset(s->nals + s->nals_allocated, 0,
2922                    (new_size - s->nals_allocated) * sizeof(*tmp));
2923             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2924             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2925             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2926             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2927             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2928             s->nals_allocated = new_size;
2929         }
2930         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2931         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2932         nal = &s->nals[s->nb_nals];
2933
2934         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2935
2936         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2937         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2938         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2939
2940
2941         if (consumed < 0) {
2942             ret = consumed;
2943             goto fail;
2944         }
2945
2946         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2947         if (ret < 0)
2948             goto fail;
2949         hls_nal_unit(s);
2950
2951         if (s->nal_unit_type == NAL_EOB_NUT ||
2952             s->nal_unit_type == NAL_EOS_NUT)
2953             s->eos = 1;
2954
2955         buf    += consumed;
2956         length -= consumed;
2957     }
2958
2959     /* parse the NAL units */
2960     for (i = 0; i < s->nb_nals; i++) {
2961         int ret;
2962         s->skipped_bytes = s->skipped_bytes_nal[i];
2963         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2964
2965         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2966         if (ret < 0) {
2967             av_log(s->avctx, AV_LOG_WARNING,
2968                    "Error parsing NAL unit #%d.\n", i);
2969             goto fail;
2970         }
2971     }
2972
2973 fail:
2974     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2975         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2976
2977     return ret;
2978 }
2979
2980 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2981 {
2982     int i;
2983     for (i = 0; i < 16; i++)
2984         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2985 }
2986
2987 static int verify_md5(HEVCContext *s, AVFrame *frame)
2988 {
2989     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2990     int pixel_shift;
2991     int i, j;
2992
2993     if (!desc)
2994         return AVERROR(EINVAL);
2995
2996     pixel_shift = desc->comp[0].depth_minus1 > 7;
2997
2998     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2999            s->poc);
3000
3001     /* the checksums are LE, so we have to byteswap for >8bpp formats
3002      * on BE arches */
3003 #if HAVE_BIGENDIAN
3004     if (pixel_shift && !s->checksum_buf) {
3005         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3006                        FFMAX3(frame->linesize[0], frame->linesize[1],
3007                               frame->linesize[2]));
3008         if (!s->checksum_buf)
3009             return AVERROR(ENOMEM);
3010     }
3011 #endif
3012
3013     for (i = 0; frame->data[i]; i++) {
3014         int width  = s->avctx->coded_width;
3015         int height = s->avctx->coded_height;
3016         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3017         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3018         uint8_t md5[16];
3019
3020         av_md5_init(s->md5_ctx);
3021         for (j = 0; j < h; j++) {
3022             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3023 #if HAVE_BIGENDIAN
3024             if (pixel_shift) {
3025                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3026                                     (const uint16_t *) src, w);
3027                 src = s->checksum_buf;
3028             }
3029 #endif
3030             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3031         }
3032         av_md5_final(s->md5_ctx, md5);
3033
3034         if (!memcmp(md5, s->md5[i], 16)) {
3035             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3036             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3037             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3038         } else {
3039             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3040             print_md5(s->avctx, AV_LOG_ERROR, md5);
3041             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3042             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3043             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3044             return AVERROR_INVALIDDATA;
3045         }
3046     }
3047
3048     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3049
3050     return 0;
3051 }
3052
3053 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3054                              AVPacket *avpkt)
3055 {
3056     int ret;
3057     HEVCContext *s = avctx->priv_data;
3058
3059     if (!avpkt->size) {
3060         ret = ff_hevc_output_frame(s, data, 1);
3061         if (ret < 0)
3062             return ret;
3063
3064         *got_output = ret;
3065         return 0;
3066     }
3067
3068     s->ref = NULL;
3069     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3070     if (ret < 0)
3071         return ret;
3072
3073     /* verify the SEI checksum */
3074     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3075         s->is_md5) {
3076         ret = verify_md5(s, s->ref->frame);
3077         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3078             ff_hevc_unref_frame(s, s->ref, ~0);
3079             return ret;
3080         }
3081     }
3082     s->is_md5 = 0;
3083
3084     if (s->is_decoded) {
3085         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3086         s->is_decoded = 0;
3087     }
3088
3089     if (s->output_frame->buf[0]) {
3090         av_frame_move_ref(data, s->output_frame);
3091         *got_output = 1;
3092     }
3093
3094     return avpkt->size;
3095 }
3096
3097 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3098 {
3099     int ret;
3100
3101     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3102     if (ret < 0)
3103         return ret;
3104
3105     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3106     if (!dst->tab_mvf_buf)
3107         goto fail;
3108     dst->tab_mvf = src->tab_mvf;
3109
3110     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3111     if (!dst->rpl_tab_buf)
3112         goto fail;
3113     dst->rpl_tab = src->rpl_tab;
3114
3115     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3116     if (!dst->rpl_buf)
3117         goto fail;
3118
3119     dst->poc        = src->poc;
3120     dst->ctb_count  = src->ctb_count;
3121     dst->window     = src->window;
3122     dst->flags      = src->flags;
3123     dst->sequence   = src->sequence;
3124
3125     return 0;
3126 fail:
3127     ff_hevc_unref_frame(s, dst, ~0);
3128     return AVERROR(ENOMEM);
3129 }
3130
3131 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3132 {
3133     HEVCContext       *s = avctx->priv_data;
3134     HEVCLocalContext *lc = s->HEVClc;
3135     int i;
3136
3137     pic_arrays_free(s);
3138
3139     av_freep(&s->md5_ctx);
3140
3141     for(i=0; i < s->nals_allocated; i++) {
3142         av_freep(&s->skipped_bytes_pos_nal[i]);
3143     }
3144     av_freep(&s->skipped_bytes_pos_size_nal);
3145     av_freep(&s->skipped_bytes_nal);
3146     av_freep(&s->skipped_bytes_pos_nal);
3147
3148     av_freep(&s->cabac_state);
3149
3150     av_frame_free(&s->tmp_frame);
3151     av_frame_free(&s->output_frame);
3152
3153     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3154         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3155         av_frame_free(&s->DPB[i].frame);
3156     }
3157
3158     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3159         av_buffer_unref(&s->vps_list[i]);
3160     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3161         av_buffer_unref(&s->sps_list[i]);
3162     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3163         av_buffer_unref(&s->pps_list[i]);
3164     s->sps = NULL;
3165     s->pps = NULL;
3166     s->vps = NULL;
3167
3168     av_buffer_unref(&s->current_sps);
3169
3170     av_freep(&s->sh.entry_point_offset);
3171     av_freep(&s->sh.offset);
3172     av_freep(&s->sh.size);
3173
3174     for (i = 1; i < s->threads_number; i++) {
3175         lc = s->HEVClcList[i];
3176         if (lc) {
3177             av_freep(&s->HEVClcList[i]);
3178             av_freep(&s->sList[i]);
3179         }
3180     }
3181     if (s->HEVClc == s->HEVClcList[0])
3182         s->HEVClc = NULL;
3183     av_freep(&s->HEVClcList[0]);
3184
3185     for (i = 0; i < s->nals_allocated; i++)
3186         av_freep(&s->nals[i].rbsp_buffer);
3187     av_freep(&s->nals);
3188     s->nals_allocated = 0;
3189
3190     return 0;
3191 }
3192
3193 static av_cold int hevc_init_context(AVCodecContext *avctx)
3194 {
3195     HEVCContext *s = avctx->priv_data;
3196     int i;
3197
3198     s->avctx = avctx;
3199
3200     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3201     if (!s->HEVClc)
3202         goto fail;
3203     s->HEVClcList[0] = s->HEVClc;
3204     s->sList[0] = s;
3205
3206     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3207     if (!s->cabac_state)
3208         goto fail;
3209
3210     s->tmp_frame = av_frame_alloc();
3211     if (!s->tmp_frame)
3212         goto fail;
3213
3214     s->output_frame = av_frame_alloc();
3215     if (!s->output_frame)
3216         goto fail;
3217
3218     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3219         s->DPB[i].frame = av_frame_alloc();
3220         if (!s->DPB[i].frame)
3221             goto fail;
3222         s->DPB[i].tf.f = s->DPB[i].frame;
3223     }
3224
3225     s->max_ra = INT_MAX;
3226
3227     s->md5_ctx = av_md5_alloc();
3228     if (!s->md5_ctx)
3229         goto fail;
3230
3231     ff_bswapdsp_init(&s->bdsp);
3232
3233     s->context_initialized = 1;
3234     s->eos = 0;
3235
3236     return 0;
3237
3238 fail:
3239     hevc_decode_free(avctx);
3240     return AVERROR(ENOMEM);
3241 }
3242
3243 static int hevc_update_thread_context(AVCodecContext *dst,
3244                                       const AVCodecContext *src)
3245 {
3246     HEVCContext *s  = dst->priv_data;
3247     HEVCContext *s0 = src->priv_data;
3248     int i, ret;
3249
3250     if (!s->context_initialized) {
3251         ret = hevc_init_context(dst);
3252         if (ret < 0)
3253             return ret;
3254     }
3255
3256     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3257         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3258         if (s0->DPB[i].frame->buf[0]) {
3259             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3260             if (ret < 0)
3261                 return ret;
3262         }
3263     }
3264
3265     if (s->sps != s0->sps)
3266         s->sps = NULL;
3267     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3268         av_buffer_unref(&s->vps_list[i]);
3269         if (s0->vps_list[i]) {
3270             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3271             if (!s->vps_list[i])
3272                 return AVERROR(ENOMEM);
3273         }
3274     }
3275
3276     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3277         av_buffer_unref(&s->sps_list[i]);
3278         if (s0->sps_list[i]) {
3279             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3280             if (!s->sps_list[i])
3281                 return AVERROR(ENOMEM);
3282         }
3283     }
3284
3285     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3286         av_buffer_unref(&s->pps_list[i]);
3287         if (s0->pps_list[i]) {
3288             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3289             if (!s->pps_list[i])
3290                 return AVERROR(ENOMEM);
3291         }
3292     }
3293
3294     av_buffer_unref(&s->current_sps);
3295     if (s0->current_sps) {
3296         s->current_sps = av_buffer_ref(s0->current_sps);
3297         if (!s->current_sps)
3298             return AVERROR(ENOMEM);
3299     }
3300
3301     if (s->sps != s0->sps)
3302         ret = set_sps(s, s0->sps);
3303
3304     s->seq_decode = s0->seq_decode;
3305     s->seq_output = s0->seq_output;
3306     s->pocTid0    = s0->pocTid0;
3307     s->max_ra     = s0->max_ra;
3308     s->eos        = s0->eos;
3309
3310     s->is_nalff        = s0->is_nalff;
3311     s->nal_length_size = s0->nal_length_size;
3312
3313     s->threads_number      = s0->threads_number;
3314     s->threads_type        = s0->threads_type;
3315
3316     if (s0->eos) {
3317         s->seq_decode = (s->seq_decode + 1) & 0xff;
3318         s->max_ra = INT_MAX;
3319     }
3320
3321     return 0;
3322 }
3323
3324 static int hevc_decode_extradata(HEVCContext *s)
3325 {
3326     AVCodecContext *avctx = s->avctx;
3327     GetByteContext gb;
3328     int ret;
3329
3330     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3331
3332     if (avctx->extradata_size > 3 &&
3333         (avctx->extradata[0] || avctx->extradata[1] ||
3334          avctx->extradata[2] > 1)) {
3335         /* It seems the extradata is encoded as hvcC format.
3336          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3337          * is finalized. When finalized, configurationVersion will be 1 and we
3338          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3339         int i, j, num_arrays, nal_len_size;
3340
3341         s->is_nalff = 1;
3342
3343         bytestream2_skip(&gb, 21);
3344         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3345         num_arrays   = bytestream2_get_byte(&gb);
3346
3347         /* nal units in the hvcC always have length coded with 2 bytes,
3348          * so put a fake nal_length_size = 2 while parsing them */
3349         s->nal_length_size = 2;
3350
3351         /* Decode nal units from hvcC. */
3352         for (i = 0; i < num_arrays; i++) {
3353             int type = bytestream2_get_byte(&gb) & 0x3f;
3354             int cnt  = bytestream2_get_be16(&gb);
3355
3356             for (j = 0; j < cnt; j++) {
3357                 // +2 for the nal size field
3358                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3359                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3360                     av_log(s->avctx, AV_LOG_ERROR,
3361                            "Invalid NAL unit size in extradata.\n");
3362                     return AVERROR_INVALIDDATA;
3363                 }
3364
3365                 ret = decode_nal_units(s, gb.buffer, nalsize);
3366                 if (ret < 0) {
3367                     av_log(avctx, AV_LOG_ERROR,
3368                            "Decoding nal unit %d %d from hvcC failed\n",
3369                            type, i);
3370                     return ret;
3371                 }
3372                 bytestream2_skip(&gb, nalsize);
3373             }
3374         }
3375
3376         /* Now store right nal length size, that will be used to parse
3377          * all other nals */
3378         s->nal_length_size = nal_len_size;
3379     } else {
3380         s->is_nalff = 0;
3381         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3382         if (ret < 0)
3383             return ret;
3384     }
3385     return 0;
3386 }
3387
3388 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3389 {
3390     HEVCContext *s = avctx->priv_data;
3391     int ret;
3392
3393     ff_init_cabac_states();
3394
3395     avctx->internal->allocate_progress = 1;
3396
3397     ret = hevc_init_context(avctx);
3398     if (ret < 0)
3399         return ret;
3400
3401     s->enable_parallel_tiles = 0;
3402     s->picture_struct = 0;
3403
3404     if(avctx->active_thread_type & FF_THREAD_SLICE)
3405         s->threads_number = avctx->thread_count;
3406     else
3407         s->threads_number = 1;
3408
3409     if (avctx->extradata_size > 0 && avctx->extradata) {
3410         ret = hevc_decode_extradata(s);
3411         if (ret < 0) {
3412             hevc_decode_free(avctx);
3413             return ret;
3414         }
3415     }
3416
3417     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3418             s->threads_type = FF_THREAD_FRAME;
3419         else
3420             s->threads_type = FF_THREAD_SLICE;
3421
3422     return 0;
3423 }
3424
3425 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3426 {
3427     HEVCContext *s = avctx->priv_data;
3428     int ret;
3429
3430     memset(s, 0, sizeof(*s));
3431
3432     ret = hevc_init_context(avctx);
3433     if (ret < 0)
3434         return ret;
3435
3436     return 0;
3437 }
3438
3439 static void hevc_decode_flush(AVCodecContext *avctx)
3440 {
3441     HEVCContext *s = avctx->priv_data;
3442     ff_hevc_flush_dpb(s);
3443     s->max_ra = INT_MAX;
3444 }
3445
3446 #define OFFSET(x) offsetof(HEVCContext, x)
3447 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3448
3449 static const AVProfile profiles[] = {
3450     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3451     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3452     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3453     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3454     { FF_PROFILE_UNKNOWN },
3455 };
3456
3457 static const AVOption options[] = {
3458     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3459         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3460     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3461         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3462     { NULL },
3463 };
3464
3465 static const AVClass hevc_decoder_class = {
3466     .class_name = "HEVC decoder",
3467     .item_name  = av_default_item_name,
3468     .option     = options,
3469     .version    = LIBAVUTIL_VERSION_INT,
3470 };
3471
3472 AVCodec ff_hevc_decoder = {
3473     .name                  = "hevc",
3474     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3475     .type                  = AVMEDIA_TYPE_VIDEO,
3476     .id                    = AV_CODEC_ID_HEVC,
3477     .priv_data_size        = sizeof(HEVCContext),
3478     .priv_class            = &hevc_decoder_class,
3479     .init                  = hevc_decode_init,
3480     .close                 = hevc_decode_free,
3481     .decode                = hevc_decode_frame,
3482     .flush                 = hevc_decode_flush,
3483     .update_thread_context = hevc_update_thread_context,
3484     .init_thread_copy      = hevc_init_thread_copy,
3485     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3486                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3487     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3488 };