git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = width  >> 3;
  93     s->bs_height = height >> 3;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc(pic_size_in_ctb);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc(min_pu_size);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_malloc(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
 120     s->vertical_bs   = av_mallocz_array(2 * s->bs_width, (s->bs_height + 1));
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146
 147     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 148     if (s->sps->chroma_format_idc != 0) {
 149         int delta = get_se_golomb(gb);
 150         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 151     }
 152
 153     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 154         luma_weight_l0_flag[i] = get_bits1(gb);
 155         if (!luma_weight_l0_flag[i]) {
 156             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 157             s->sh.luma_offset_l0[i] = 0;
 158         }
 159     }
 160     if (s->sps->chroma_format_idc != 0) {
 161         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 162             chroma_weight_l0_flag[i] = get_bits1(gb);
 163     } else {
 164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 165             chroma_weight_l0_flag[i] = 0;
 166     }
 167     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 168         if (luma_weight_l0_flag[i]) {
 169             int delta_luma_weight_l0 = get_se_golomb(gb);
 170             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 171             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 172         }
 173         if (chroma_weight_l0_flag[i]) {
 174             for (j = 0; j < 2; j++) {
 175                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 176                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 177                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 178                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 179                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 180             }
 181         } else {
 182             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 183             s->sh.chroma_offset_l0[i][0] = 0;
 184             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 185             s->sh.chroma_offset_l0[i][1] = 0;
 186         }
 187     }
 188     if (s->sh.slice_type == B_SLICE) {
 189         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 190             luma_weight_l1_flag[i] = get_bits1(gb);
 191             if (!luma_weight_l1_flag[i]) {
 192                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 193                 s->sh.luma_offset_l1[i] = 0;
 194             }
 195         }
 196         if (s->sps->chroma_format_idc != 0) {
 197             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 198                 chroma_weight_l1_flag[i] = get_bits1(gb);
 199         } else {
 200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 201                 chroma_weight_l1_flag[i] = 0;
 202         }
 203         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 204             if (luma_weight_l1_flag[i]) {
 205                 int delta_luma_weight_l1 = get_se_golomb(gb);
 206                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 207                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 208             }
 209             if (chroma_weight_l1_flag[i]) {
 210                 for (j = 0; j < 2; j++) {
 211                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 212                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 213                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 214                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 215                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 216                 }
 217             } else {
 218                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 219                 s->sh.chroma_offset_l1[i][0] = 0;
 220                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 221                 s->sh.chroma_offset_l1[i][1] = 0;
 222             }
 223         }
 224     }
 225 }
 226
 227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 228 {
 229     const HEVCSPS *sps = s->sps;
 230     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 231     int prev_delta_msb = 0;
 232     unsigned int nb_sps = 0, nb_sh;
 233     int i;
 234
 235     rps->nb_refs = 0;
 236     if (!sps->long_term_ref_pics_present_flag)
 237         return 0;
 238
 239     if (sps->num_long_term_ref_pics_sps > 0)
 240         nb_sps = get_ue_golomb_long(gb);
 241     nb_sh = get_ue_golomb_long(gb);
 242
 243     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 244         return AVERROR_INVALIDDATA;
 245
 246     rps->nb_refs = nb_sh + nb_sps;
 247
 248     for (i = 0; i < rps->nb_refs; i++) {
 249         uint8_t delta_poc_msb_present;
 250
 251         if (i < nb_sps) {
 252             uint8_t lt_idx_sps = 0;
 253
 254             if (sps->num_long_term_ref_pics_sps > 1)
 255                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 256
 257             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 258             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 259         } else {
 260             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 261             rps->used[i] = get_bits1(gb);
 262         }
 263
 264         delta_poc_msb_present = get_bits1(gb);
 265         if (delta_poc_msb_present) {
 266             int delta = get_ue_golomb_long(gb);
 267
 268             if (i && i != nb_sps)
 269                 delta += prev_delta_msb;
 270
 271             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 272             prev_delta_msb = delta;
 273         }
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 280 {
 281     int ret;
 282     unsigned int num = 0, den = 0;
 283
 284     pic_arrays_free(s);
 285     ret = pic_arrays_init(s, sps);
 286     if (ret < 0)
 287         goto fail;
 288
 289     s->avctx->coded_width         = sps->width;
 290     s->avctx->coded_height        = sps->height;
 291     s->avctx->width               = sps->output_width;
 292     s->avctx->height              = sps->output_height;
 293     s->avctx->pix_fmt             = sps->pix_fmt;
 294     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295
 296     ff_set_sar(s->avctx, sps->vui.sar);
 297
 298     if (sps->vui.video_signal_type_present_flag)
 299         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 300                                                                : AVCOL_RANGE_MPEG;
 301     else
 302         s->avctx->color_range = AVCOL_RANGE_MPEG;
 303
 304     if (sps->vui.colour_description_present_flag) {
 305         s->avctx->color_primaries = sps->vui.colour_primaries;
 306         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 307         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 308     } else {
 309         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 310         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 311         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 312     }
 313
 314     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 315     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 316     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 317
 318     if (sps->sao_enabled) {
 319         av_frame_unref(s->tmp_frame);
 320         ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
 321         if (ret < 0)
 322             goto fail;
 323         s->frame = s->tmp_frame;
 324     }
 325
 326     s->sps = sps;
 327     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 328
 329     if (s->vps->vps_timing_info_present_flag) {
 330         num = s->vps->vps_num_units_in_tick;
 331         den = s->vps->vps_time_scale;
 332     } else if (sps->vui.vui_timing_info_present_flag) {
 333         num = sps->vui.vui_num_units_in_tick;
 334         den = sps->vui.vui_time_scale;
 335     }
 336
 337     if (num != 0 && den != 0)
 338         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 339                   num, den, 1 << 30);
 340
 341     return 0;
 342
 343 fail:
 344     pic_arrays_free(s);
 345     s->sps = NULL;
 346     return ret;
 347 }
 348
 349 static int is_sps_exist(HEVCContext *s, const HEVCSPS* last_sps)
 350 {
 351     int i;
 352
 353     for( i = 0; i < MAX_SPS_COUNT; i++)
 354         if(s->sps_list[i])
 355             if (last_sps == (HEVCSPS*)s->sps_list[i]->data)
 356                 return 1;
 357     return 0;
 358 }
 359
 360 static int hls_slice_header(HEVCContext *s)
 361 {
 362     GetBitContext *gb = &s->HEVClc->gb;
 363     SliceHeader *sh   = &s->sh;
 364     int i, j, ret;
 365
 366     // Coded parameters
 367     sh->first_slice_in_pic_flag = get_bits1(gb);
 368     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 369         s->seq_decode = (s->seq_decode + 1) & 0xff;
 370         s->max_ra     = INT_MAX;
 371         if (IS_IDR(s))
 372             ff_hevc_clear_refs(s);
 373     }
 374     sh->no_output_of_prior_pics_flag = 0;
 375     if (IS_IRAP(s))
 376         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 377     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 378         sh->no_output_of_prior_pics_flag = 1;
 379
 380     sh->pps_id = get_ue_golomb_long(gb);
 381     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 382         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 383         return AVERROR_INVALIDDATA;
 384     }
 385     if (!sh->first_slice_in_pic_flag &&
 386         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 387         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 388         return AVERROR_INVALIDDATA;
 389     }
 390     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 391
 392     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 393         const HEVCSPS* last_sps = s->sps;
 394         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 395         if (last_sps) {
 396             if (is_sps_exist(s, last_sps)) {
 397                 if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 398                         s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering != last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 399                     sh->no_output_of_prior_pics_flag = 0;
 400             } else
 401                 sh->no_output_of_prior_pics_flag = 0;
 402         }
 403         ff_hevc_clear_refs(s);
 404         ret = set_sps(s, s->sps);
 405         if (ret < 0)
 406             return ret;
 407
 408         s->seq_decode = (s->seq_decode + 1) & 0xff;
 409         s->max_ra     = INT_MAX;
 410     }
 411
 412     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 413     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 414
 415     sh->dependent_slice_segment_flag = 0;
 416     if (!sh->first_slice_in_pic_flag) {
 417         int slice_address_length;
 418
 419         if (s->pps->dependent_slice_segments_enabled_flag)
 420             sh->dependent_slice_segment_flag = get_bits1(gb);
 421
 422         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 423                                             s->sps->ctb_height);
 424         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 425         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 426             av_log(s->avctx, AV_LOG_ERROR,
 427                    "Invalid slice segment address: %u.\n",
 428                    sh->slice_segment_addr);
 429             return AVERROR_INVALIDDATA;
 430         }
 431
 432         if (!sh->dependent_slice_segment_flag) {
 433             sh->slice_addr = sh->slice_segment_addr;
 434             s->slice_idx++;
 435         }
 436     } else {
 437         sh->slice_segment_addr = sh->slice_addr = 0;
 438         s->slice_idx           = 0;
 439         s->slice_initialized   = 0;
 440     }
 441
 442     if (!sh->dependent_slice_segment_flag) {
 443         s->slice_initialized = 0;
 444
 445         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 446             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 447
 448         sh->slice_type = get_ue_golomb_long(gb);
 449         if (!(sh->slice_type == I_SLICE ||
 450               sh->slice_type == P_SLICE ||
 451               sh->slice_type == B_SLICE)) {
 452             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 453                    sh->slice_type);
 454             return AVERROR_INVALIDDATA;
 455         }
 456         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 457             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 458             return AVERROR_INVALIDDATA;
 459         }
 460
 461         // when flag is not present, picture is inferred to be output
 462         sh->pic_output_flag = 1;
 463         if (s->pps->output_flag_present_flag)
 464             sh->pic_output_flag = get_bits1(gb);
 465
 466         if (s->sps->separate_colour_plane_flag)
 467             sh->colour_plane_id = get_bits(gb, 2);
 468
 469         if (!IS_IDR(s)) {
 470             int short_term_ref_pic_set_sps_flag, poc;
 471
 472             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 473             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 474             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 475                 av_log(s->avctx, AV_LOG_WARNING,
 476                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 477                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 478                     return AVERROR_INVALIDDATA;
 479                 poc = s->poc;
 480             }
 481             s->poc = poc;
 482
 483             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 484             if (!short_term_ref_pic_set_sps_flag) {
 485                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 486                 if (ret < 0)
 487                     return ret;
 488
 489                 sh->short_term_rps = &sh->slice_rps;
 490             } else {
 491                 int numbits, rps_idx;
 492
 493                 if (!s->sps->nb_st_rps) {
 494                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 495                     return AVERROR_INVALIDDATA;
 496                 }
 497
 498                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 499                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 500                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 501             }
 502
 503             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 504             if (ret < 0) {
 505                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 506                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 507                     return AVERROR_INVALIDDATA;
 508             }
 509
 510             if (s->sps->sps_temporal_mvp_enabled_flag)
 511                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 512             else
 513                 sh->slice_temporal_mvp_enabled_flag = 0;
 514         } else {
 515             s->sh.short_term_rps = NULL;
 516             s->poc               = 0;
 517         }
 518
 519         /* 8.3.1 */
 520         if (s->temporal_id == 0 &&
 521             s->nal_unit_type != NAL_TRAIL_N &&
 522             s->nal_unit_type != NAL_TSA_N   &&
 523             s->nal_unit_type != NAL_STSA_N  &&
 524             s->nal_unit_type != NAL_RADL_N  &&
 525             s->nal_unit_type != NAL_RADL_R  &&
 526             s->nal_unit_type != NAL_RASL_N  &&
 527             s->nal_unit_type != NAL_RASL_R)
 528             s->pocTid0 = s->poc;
 529
 530         if (s->sps->sao_enabled) {
 531             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 532             sh->slice_sample_adaptive_offset_flag[1] =
 533             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 534         } else {
 535             sh->slice_sample_adaptive_offset_flag[0] = 0;
 536             sh->slice_sample_adaptive_offset_flag[1] = 0;
 537             sh->slice_sample_adaptive_offset_flag[2] = 0;
 538         }
 539
 540         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 541         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 542             int nb_refs;
 543
 544             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 545             if (sh->slice_type == B_SLICE)
 546                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 547
 548             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 549                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 550                 if (sh->slice_type == B_SLICE)
 551                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 552             }
 553             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 554                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 555                        sh->nb_refs[L0], sh->nb_refs[L1]);
 556                 return AVERROR_INVALIDDATA;
 557             }
 558
 559             sh->rpl_modification_flag[0] = 0;
 560             sh->rpl_modification_flag[1] = 0;
 561             nb_refs = ff_hevc_frame_nb_refs(s);
 562             if (!nb_refs) {
 563                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 564                 return AVERROR_INVALIDDATA;
 565             }
 566
 567             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 568                 sh->rpl_modification_flag[0] = get_bits1(gb);
 569                 if (sh->rpl_modification_flag[0]) {
 570                     for (i = 0; i < sh->nb_refs[L0]; i++)
 571                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 572                 }
 573
 574                 if (sh->slice_type == B_SLICE) {
 575                     sh->rpl_modification_flag[1] = get_bits1(gb);
 576                     if (sh->rpl_modification_flag[1] == 1)
 577                         for (i = 0; i < sh->nb_refs[L1]; i++)
 578                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 579                 }
 580             }
 581
 582             if (sh->slice_type == B_SLICE)
 583                 sh->mvd_l1_zero_flag = get_bits1(gb);
 584
 585             if (s->pps->cabac_init_present_flag)
 586                 sh->cabac_init_flag = get_bits1(gb);
 587             else
 588                 sh->cabac_init_flag = 0;
 589
 590             sh->collocated_ref_idx = 0;
 591             if (sh->slice_temporal_mvp_enabled_flag) {
 592                 sh->collocated_list = L0;
 593                 if (sh->slice_type == B_SLICE)
 594                     sh->collocated_list = !get_bits1(gb);
 595
 596                 if (sh->nb_refs[sh->collocated_list] > 1) {
 597                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 598                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 599                         av_log(s->avctx, AV_LOG_ERROR,
 600                                "Invalid collocated_ref_idx: %d.\n",
 601                                sh->collocated_ref_idx);
 602                         return AVERROR_INVALIDDATA;
 603                     }
 604                 }
 605             }
 606
 607             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 608                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 609                 pred_weight_table(s, gb);
 610             }
 611
 612             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 613             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 614                 av_log(s->avctx, AV_LOG_ERROR,
 615                        "Invalid number of merging MVP candidates: %d.\n",
 616                        sh->max_num_merge_cand);
 617                 return AVERROR_INVALIDDATA;
 618             }
 619         }
 620
 621         sh->slice_qp_delta = get_se_golomb(gb);
 622
 623         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 624             sh->slice_cb_qp_offset = get_se_golomb(gb);
 625             sh->slice_cr_qp_offset = get_se_golomb(gb);
 626         } else {
 627             sh->slice_cb_qp_offset = 0;
 628             sh->slice_cr_qp_offset = 0;
 629         }
 630
 631         if (s->pps->deblocking_filter_control_present_flag) {
 632             int deblocking_filter_override_flag = 0;
 633
 634             if (s->pps->deblocking_filter_override_enabled_flag)
 635                 deblocking_filter_override_flag = get_bits1(gb);
 636
 637             if (deblocking_filter_override_flag) {
 638                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 639                 if (!sh->disable_deblocking_filter_flag) {
 640                     sh->beta_offset = get_se_golomb(gb) * 2;
 641                     sh->tc_offset   = get_se_golomb(gb) * 2;
 642                 }
 643             } else {
 644                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 645                 sh->beta_offset                    = s->pps->beta_offset;
 646                 sh->tc_offset                      = s->pps->tc_offset;
 647             }
 648         } else {
 649             sh->disable_deblocking_filter_flag = 0;
 650             sh->beta_offset                    = 0;
 651             sh->tc_offset                      = 0;
 652         }
 653
 654         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 655             (sh->slice_sample_adaptive_offset_flag[0] ||
 656              sh->slice_sample_adaptive_offset_flag[1] ||
 657              !sh->disable_deblocking_filter_flag)) {
 658             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 659         } else {
 660             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 661         }
 662     } else if (!s->slice_initialized) {
 663         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 664         return AVERROR_INVALIDDATA;
 665     }
 666
 667     sh->num_entry_point_offsets = 0;
 668     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 669         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 670         if (sh->num_entry_point_offsets > 0) {
 671             int offset_len = get_ue_golomb_long(gb) + 1;
 672             int segments = offset_len >> 4;
 673             int rest = (offset_len & 15);
 674             av_freep(&sh->entry_point_offset);
 675             av_freep(&sh->offset);
 676             av_freep(&sh->size);
 677             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 678             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 679             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 680             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 681                 sh->num_entry_point_offsets = 0;
 682                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 683                 return AVERROR(ENOMEM);
 684             }
 685             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 686                 int val = 0;
 687                 for (j = 0; j < segments; j++) {
 688                     val <<= 16;
 689                     val += get_bits(gb, 16);
 690                 }
 691                 if (rest) {
 692                     val <<= rest;
 693                     val += get_bits(gb, rest);
 694                 }
 695                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 696             }
 697             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 698                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 699                 s->threads_number = 1;
 700             } else
 701                 s->enable_parallel_tiles = 0;
 702         } else
 703             s->enable_parallel_tiles = 0;
 704     }
 705
 706     if (s->pps->slice_header_extension_present_flag) {
 707         unsigned int length = get_ue_golomb_long(gb);
 708         if (length*8LL > get_bits_left(gb)) {
 709             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 710             return AVERROR_INVALIDDATA;
 711         }
 712         for (i = 0; i < length; i++)
 713             skip_bits(gb, 8);  // slice_header_extension_data_byte
 714     }
 715
 716     // Inferred parameters
 717     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 718     if (sh->slice_qp > 51 ||
 719         sh->slice_qp < -s->sps->qp_bd_offset) {
 720         av_log(s->avctx, AV_LOG_ERROR,
 721                "The slice_qp %d is outside the valid range "
 722                "[%d, 51].\n",
 723                sh->slice_qp,
 724                -s->sps->qp_bd_offset);
 725         return AVERROR_INVALIDDATA;
 726     }
 727
 728     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 729
 730     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 731         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 732         return AVERROR_INVALIDDATA;
 733     }
 734
 735     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 736
 737     if (!s->pps->cu_qp_delta_enabled_flag)
 738         s->HEVClc->qp_y = s->sh.slice_qp;
 739
 740     s->slice_initialized = 1;
 741
 742     return 0;
 743 }
 744
 745 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 746
 747 #define SET_SAO(elem, value)                            \
 748 do {                                                    \
 749     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 750         sao->elem = value;                              \
 751     else if (sao_merge_left_flag)                       \
 752         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 753     else if (sao_merge_up_flag)                         \
 754         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 755     else                                                \
 756         sao->elem = 0;                                  \
 757 } while (0)
 758
 759 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 760 {
 761     HEVCLocalContext *lc    = s->HEVClc;
 762     int sao_merge_left_flag = 0;
 763     int sao_merge_up_flag   = 0;
 764     SAOParams *sao          = &CTB(s->sao, rx, ry);
 765     int c_idx, i;
 766
 767     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 768         s->sh.slice_sample_adaptive_offset_flag[1]) {
 769         if (rx > 0) {
 770             if (lc->ctb_left_flag)
 771                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 772         }
 773         if (ry > 0 && !sao_merge_left_flag) {
 774             if (lc->ctb_up_flag)
 775                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 776         }
 777     }
 778
 779     for (c_idx = 0; c_idx < 3; c_idx++) {
 780         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 781                                                  s->pps->log2_sao_offset_scale_chroma;
 782
 783         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 784             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 785             continue;
 786         }
 787
 788         if (c_idx == 2) {
 789             sao->type_idx[2] = sao->type_idx[1];
 790             sao->eo_class[2] = sao->eo_class[1];
 791         } else {
 792             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 793         }
 794
 795         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 796             continue;
 797
 798         for (i = 0; i < 4; i++)
 799             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 800
 801         if (sao->type_idx[c_idx] == SAO_BAND) {
 802             for (i = 0; i < 4; i++) {
 803                 if (sao->offset_abs[c_idx][i]) {
 804                     SET_SAO(offset_sign[c_idx][i],
 805                             ff_hevc_sao_offset_sign_decode(s));
 806                 } else {
 807                     sao->offset_sign[c_idx][i] = 0;
 808                 }
 809             }
 810             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 811         } else if (c_idx != 2) {
 812             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 813         }
 814
 815         // Inferred parameters
 816         sao->offset_val[c_idx][0] = 0;
 817         for (i = 0; i < 4; i++) {
 818             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 819             if (sao->type_idx[c_idx] == SAO_EDGE) {
 820                 if (i > 1)
 821                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 822             } else if (sao->offset_sign[c_idx][i]) {
 823                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 824             }
 825             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 826         }
 827     }
 828 }
 829
 830 #undef SET_SAO
 831 #undef CTB
 832
 833 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 834                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 835                               int log2_cb_size, int log2_trafo_size,
 836                               int trafo_depth, int blk_idx)
 837 {
 838     HEVCLocalContext *lc = s->HEVClc;
 839     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 840     int i;
 841
 842     if (lc->cu.pred_mode == MODE_INTRA) {
 843         int trafo_size = 1 << log2_trafo_size;
 844         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 845
 846         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 847     }
 848
 849     if (lc->tt.cbf_luma ||
 850         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 851         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 852         (s->sps->chroma_format_idc == 2 &&
 853          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 854          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))))) {
 855         int scan_idx   = SCAN_DIAG;
 856         int scan_idx_c = SCAN_DIAG;
 857         int cbf_luma = lc->tt.cbf_luma;
 858         int cbf_chroma = SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 859                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 860                          (s->sps->chroma_format_idc == 2 &&
 861                          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 862                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))));
 863
 864         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 865             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 866             if (lc->tu.cu_qp_delta != 0)
 867                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 868                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 869             lc->tu.is_cu_qp_delta_coded = 1;
 870
 871             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 872                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 873                 av_log(s->avctx, AV_LOG_ERROR,
 874                        "The cu_qp_delta %d is outside the valid range "
 875                        "[%d, %d].\n",
 876                        lc->tu.cu_qp_delta,
 877                        -(26 + s->sps->qp_bd_offset / 2),
 878                         (25 + s->sps->qp_bd_offset / 2));
 879                 return AVERROR_INVALIDDATA;
 880             }
 881
 882             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
 883         }
 884
 885         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 886             if (lc->tu.intra_pred_mode >= 6 &&
 887                 lc->tu.intra_pred_mode <= 14) {
 888                 scan_idx = SCAN_VERT;
 889             } else if (lc->tu.intra_pred_mode >= 22 &&
 890                        lc->tu.intra_pred_mode <= 30) {
 891                 scan_idx = SCAN_HORIZ;
 892             }
 893
 894             if (lc->tu.intra_pred_mode_c >=  6 &&
 895                 lc->tu.intra_pred_mode_c <= 14) {
 896                 scan_idx_c = SCAN_VERT;
 897             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 898                        lc->tu.intra_pred_mode_c <= 30) {
 899                 scan_idx_c = SCAN_HORIZ;
 900             }
 901         }
 902
 903
 904         if (cbf_luma)
 905             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 906         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 907             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 908             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 909
 910             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 911                 if (lc->cu.pred_mode == MODE_INTRA) {
 912                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 913                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 914                 }
 915                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 916                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 917                                                 log2_trafo_size_c, scan_idx_c, 1);
 918             }
 919
 920             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 921                 if (lc->cu.pred_mode == MODE_INTRA) {
 922                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 923                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
 924                 }
 925                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 926                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 927                                                 log2_trafo_size_c, scan_idx_c, 2);
 928             }
 929         } else if (blk_idx == 3) {
 930             int trafo_size_h = 1 << (log2_trafo_size + 1);
 931             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
 932             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 933                 if (lc->cu.pred_mode == MODE_INTRA) {
 934                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
 935                                                     trafo_size_h, trafo_size_v);
 936                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
 937                 }
 938                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
 939                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
 940                                                 log2_trafo_size, scan_idx_c, 1);
 941             }
 942             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 943                 if (lc->cu.pred_mode == MODE_INTRA) {
 944                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
 945                                                 trafo_size_h, trafo_size_v);
 946                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
 947                 }
 948                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
 949                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
 950                                                 log2_trafo_size, scan_idx_c, 2);
 951             }
 952         }
 953     } else if (lc->cu.pred_mode == MODE_INTRA) {
 954         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 955             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 956             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 957             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
 958             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
 959             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
 960             if (s->sps->chroma_format_idc == 2) {
 961                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
 962                                                 trafo_size_h, trafo_size_v);
 963                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
 964                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
 965             }
 966         } else if (blk_idx == 3) {
 967             int trafo_size_h = 1 << (log2_trafo_size + 1);
 968             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
 969             ff_hevc_set_neighbour_available(s, xBase, yBase,
 970                                             trafo_size_h, trafo_size_v);
 971             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
 972             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
 973             if (s->sps->chroma_format_idc == 2) {
 974                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
 975                                                 trafo_size_h, trafo_size_v);
 976                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
 977                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
 978             }
 979         }
 980     }
 981
 982     return 0;
 983 }
 984
 985 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
 986 {
 987     int cb_size          = 1 << log2_cb_size;
 988     int log2_min_pu_size = s->sps->log2_min_pu_size;
 989
 990     int min_pu_width     = s->sps->min_pu_width;
 991     int x_end = FFMIN(x0 + cb_size, s->sps->width);
 992     int y_end = FFMIN(y0 + cb_size, s->sps->height);
 993     int i, j;
 994
 995     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
 996         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
 997             s->is_pcm[i + j * min_pu_width] = 2;
 998 }
 999
1000 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1001                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1002                               int log2_cb_size, int log2_trafo_size,
1003                               int trafo_depth, int blk_idx)
1004 {
1005     HEVCLocalContext *lc = s->HEVClc;
1006     uint8_t split_transform_flag;
1007     int ret;
1008
1009     if (trafo_depth > 0 && log2_trafo_size == 2) {
1010         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1011             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1012         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1013             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1014         if (s->sps->chroma_format_idc == 2) {
1015             int xBase_cb = xBase & ((1 << log2_trafo_size) - 1);
1016             int yBase_cb = yBase & ((1 << log2_trafo_size) - 1);
1017             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1018                 SAMPLE_CBF2(lc->tt.cbf_cb[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1019             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1020                 SAMPLE_CBF2(lc->tt.cbf_cr[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1021         }
1022     } else {
1023         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1024         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1025         if (s->sps->chroma_format_idc == 2) {
1026             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1027             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) = 0;
1028         }
1029     }
1030
1031     if (lc->cu.intra_split_flag) {
1032         if (trafo_depth == 1) {
1033             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1034             if (s->sps->chroma_format_idc == 3) {
1035                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1036                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1037             } else {
1038                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1039                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1040             }
1041         }
1042     } else {
1043         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1044         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1045         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1046     }
1047
1048     lc->tt.cbf_luma = 1;
1049
1050     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1051                               lc->cu.pred_mode == MODE_INTER &&
1052                               lc->cu.part_mode != PART_2Nx2N &&
1053                               trafo_depth == 0;
1054
1055     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1056         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1057         trafo_depth     < lc->cu.max_trafo_depth       &&
1058         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1059         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1060     } else {
1061         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1062                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1063                                lc->tt.inter_split_flag;
1064     }
1065
1066     if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1067         if (trafo_depth == 0 ||
1068             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1069             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1070                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1071             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1072                 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1073                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1074             }
1075         }
1076
1077         if (trafo_depth == 0 ||
1078             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1079             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1080                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1081             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1082                 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1083                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1084             }
1085         }
1086     }
1087
1088     if (split_transform_flag) {
1089         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1090         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1091
1092         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1093                                  log2_cb_size, log2_trafo_size - 1,
1094                                  trafo_depth + 1, 0);
1095         if (ret < 0)
1096             return ret;
1097         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1098                                  log2_cb_size, log2_trafo_size - 1,
1099                                  trafo_depth + 1, 1);
1100         if (ret < 0)
1101             return ret;
1102         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1103                                  log2_cb_size, log2_trafo_size - 1,
1104                                  trafo_depth + 1, 2);
1105         if (ret < 0)
1106             return ret;
1107         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1108                                  log2_cb_size, log2_trafo_size - 1,
1109                                  trafo_depth + 1, 3);
1110         if (ret < 0)
1111             return ret;
1112     } else {
1113         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1114         int log2_min_tu_size = s->sps->log2_min_tb_size;
1115         int min_tu_width     = s->sps->min_tb_width;
1116
1117         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1118             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1119             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
1120             (s->sps->chroma_format_idc == 2 &&
1121              (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) ||
1122               SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1)))))) {
1123             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1124         }
1125
1126         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1127                                  log2_cb_size, log2_trafo_size, trafo_depth,
1128                                  blk_idx);
1129         if (ret < 0)
1130             return ret;
1131         // TODO: store cbf_luma somewhere else
1132         if (lc->tt.cbf_luma) {
1133             int i, j;
1134             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1135                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1136                     int x_tu = (x0 + j) >> log2_min_tu_size;
1137                     int y_tu = (y0 + i) >> log2_min_tu_size;
1138                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1139                 }
1140         }
1141         if (!s->sh.disable_deblocking_filter_flag) {
1142             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1143             if (s->pps->transquant_bypass_enable_flag &&
1144                 lc->cu.cu_transquant_bypass_flag)
1145                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1146         }
1147     }
1148     return 0;
1149 }
1150
1151 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1152 {
1153     //TODO: non-4:2:0 support
1154     HEVCLocalContext *lc = s->HEVClc;
1155     GetBitContext gb;
1156     int cb_size   = 1 << log2_cb_size;
1157     int stride0   = s->frame->linesize[0];
1158     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1159     int   stride1 = s->frame->linesize[1];
1160     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1161     int   stride2 = s->frame->linesize[2];
1162     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1163
1164     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1165                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1166                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1167                           s->sps->pcm.bit_depth_chroma;
1168     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1169     int ret;
1170
1171     if (!s->sh.disable_deblocking_filter_flag)
1172         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1173
1174     ret = init_get_bits(&gb, pcm, length);
1175     if (ret < 0)
1176         return ret;
1177
1178     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1179     s->hevcdsp.put_pcm(dst1, stride1,
1180                        cb_size >> s->sps->hshift[1],
1181                        cb_size >> s->sps->vshift[1],
1182                        &gb, s->sps->pcm.bit_depth_chroma);
1183     s->hevcdsp.put_pcm(dst2, stride2,
1184                        cb_size >> s->sps->hshift[2],
1185                        cb_size >> s->sps->vshift[2],
1186                        &gb, s->sps->pcm.bit_depth_chroma);
1187     return 0;
1188 }
1189
1190 /**
1191  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1192  *
1193  * @param s HEVC decoding context
1194  * @param dst target buffer for block data at block position
1195  * @param dststride stride of the dst buffer
1196  * @param ref reference picture buffer at origin (0, 0)
1197  * @param mv motion vector (relative to block position) to get pixel data from
1198  * @param x_off horizontal position of block from origin (0, 0)
1199  * @param y_off vertical position of block from origin (0, 0)
1200  * @param block_w width of block
1201  * @param block_h height of block
1202  * @param luma_weight weighting factor applied to the luma prediction
1203  * @param luma_offset additive offset applied to the luma prediction value
1204  */
1205
1206 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1207                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1208                         int block_w, int block_h, int luma_weight, int luma_offset)
1209 {
1210     HEVCLocalContext *lc = s->HEVClc;
1211     uint8_t *src         = ref->data[0];
1212     ptrdiff_t srcstride  = ref->linesize[0];
1213     int pic_width        = s->sps->width;
1214     int pic_height       = s->sps->height;
1215     int mx               = mv->x & 3;
1216     int my               = mv->y & 3;
1217     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1218                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1219     int idx              = ff_hevc_pel_weight[block_w];
1220
1221     x_off += mv->x >> 2;
1222     y_off += mv->y >> 2;
1223     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1224
1225     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1226         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1227         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1228         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1229         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1230         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1231
1232         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1233                                  edge_emu_stride, srcstride,
1234                                  block_w + QPEL_EXTRA,
1235                                  block_h + QPEL_EXTRA,
1236                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1237                                  pic_width, pic_height);
1238         src = lc->edge_emu_buffer + buf_offset;
1239         srcstride = edge_emu_stride;
1240     }
1241
1242     if (!weight_flag)
1243         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1244                                                       block_h, mx, my, block_w);
1245     else
1246         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1247                                                         block_h, s->sh.luma_log2_weight_denom,
1248                                                         luma_weight, luma_offset, mx, my, block_w);
1249 }
1250
1251 /**
1252  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1253  *
1254  * @param s HEVC decoding context
1255  * @param dst target buffer for block data at block position
1256  * @param dststride stride of the dst buffer
1257  * @param ref0 reference picture0 buffer at origin (0, 0)
1258  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1259  * @param x_off horizontal position of block from origin (0, 0)
1260  * @param y_off vertical position of block from origin (0, 0)
1261  * @param block_w width of block
1262  * @param block_h height of block
1263  * @param ref1 reference picture1 buffer at origin (0, 0)
1264  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1265  * @param current_mv current motion vector structure
1266  */
1267  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1268                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1269                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1270 {
1271     HEVCLocalContext *lc = s->HEVClc;
1272     DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1273     ptrdiff_t src0stride  = ref0->linesize[0];
1274     ptrdiff_t src1stride  = ref1->linesize[0];
1275     int pic_width        = s->sps->width;
1276     int pic_height       = s->sps->height;
1277     int mx0              = mv0->x & 3;
1278     int my0              = mv0->y & 3;
1279     int mx1              = mv1->x & 3;
1280     int my1              = mv1->y & 3;
1281     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1282                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1283     int x_off0           = x_off + (mv0->x >> 2);
1284     int y_off0           = y_off + (mv0->y >> 2);
1285     int x_off1           = x_off + (mv1->x >> 2);
1286     int y_off1           = y_off + (mv1->y >> 2);
1287     int idx              = ff_hevc_pel_weight[block_w];
1288
1289     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1290     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1291
1292     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1293         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1294         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1295         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1296         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1297         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1298
1299         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1300                                  edge_emu_stride, src0stride,
1301                                  block_w + QPEL_EXTRA,
1302                                  block_h + QPEL_EXTRA,
1303                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1304                                  pic_width, pic_height);
1305         src0 = lc->edge_emu_buffer + buf_offset;
1306         src0stride = edge_emu_stride;
1307     }
1308
1309     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1310         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1311         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1312         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1313         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1314         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1315
1316         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1317                                  edge_emu_stride, src1stride,
1318                                  block_w + QPEL_EXTRA,
1319                                  block_h + QPEL_EXTRA,
1320                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1321                                  pic_width, pic_height);
1322         src1 = lc->edge_emu_buffer2 + buf_offset;
1323         src1stride = edge_emu_stride;
1324     }
1325
1326     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
1327                                                 block_h, mx0, my0, block_w);
1328     if (!weight_flag)
1329         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1330                                                        block_h, mx1, my1, block_w);
1331     else
1332         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1333                                                          block_h, s->sh.luma_log2_weight_denom,
1334                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1335                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1336                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1337                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1338                                                          mx1, my1, block_w);
1339
1340 }
1341
1342 /**
1343  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1344  *
1345  * @param s HEVC decoding context
1346  * @param dst1 target buffer for block data at block position (U plane)
1347  * @param dst2 target buffer for block data at block position (V plane)
1348  * @param dststride stride of the dst1 and dst2 buffers
1349  * @param ref reference picture buffer at origin (0, 0)
1350  * @param mv motion vector (relative to block position) to get pixel data from
1351  * @param x_off horizontal position of block from origin (0, 0)
1352  * @param y_off vertical position of block from origin (0, 0)
1353  * @param block_w width of block
1354  * @param block_h height of block
1355  * @param chroma_weight weighting factor applied to the chroma prediction
1356  * @param chroma_offset additive offset applied to the chroma prediction value
1357  */
1358
1359 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1360                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1361                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1362 {
1363     HEVCLocalContext *lc = s->HEVClc;
1364     int pic_width        = s->sps->width >> s->sps->hshift[1];
1365     int pic_height       = s->sps->height >> s->sps->vshift[1];
1366     const Mv *mv         = &current_mv->mv[reflist];
1367     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1368                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1369     int idx              = ff_hevc_pel_weight[block_w];
1370     int hshift           = s->sps->hshift[1];
1371     int vshift           = s->sps->vshift[1];
1372     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1373     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1374     intptr_t _mx         = mx << (1 - hshift);
1375     intptr_t _my         = my << (1 - vshift);
1376
1377     x_off += mv->x >> (2 + hshift);
1378     y_off += mv->y >> (2 + vshift);
1379     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1380
1381     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1382         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1383         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1384         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1385         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1386         int buf_offset0 = EPEL_EXTRA_BEFORE *
1387                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1388         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1389                                  edge_emu_stride, srcstride,
1390                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1391                                  x_off - EPEL_EXTRA_BEFORE,
1392                                  y_off - EPEL_EXTRA_BEFORE,
1393                                  pic_width, pic_height);
1394
1395         src0 = lc->edge_emu_buffer + buf_offset0;
1396         srcstride = edge_emu_stride;
1397     }
1398     if (!weight_flag)
1399         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1400                                                   block_h, _mx, _my, block_w);
1401     else
1402         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1403                                                         block_h, s->sh.chroma_log2_weight_denom,
1404                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1405 }
1406
1407 /**
1408  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1409  *
1410  * @param s HEVC decoding context
1411  * @param dst target buffer for block data at block position
1412  * @param dststride stride of the dst buffer
1413  * @param ref0 reference picture0 buffer at origin (0, 0)
1414  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1415  * @param x_off horizontal position of block from origin (0, 0)
1416  * @param y_off vertical position of block from origin (0, 0)
1417  * @param block_w width of block
1418  * @param block_h height of block
1419  * @param ref1 reference picture1 buffer at origin (0, 0)
1420  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1421  * @param current_mv current motion vector structure
1422  * @param cidx chroma component(cb, cr)
1423  */
1424 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1425                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1426 {
1427     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1428     int tmpstride = MAX_PB_SIZE;
1429     HEVCLocalContext *lc = s->HEVClc;
1430     uint8_t *src1        = ref0->data[cidx+1];
1431     uint8_t *src2        = ref1->data[cidx+1];
1432     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1433     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1434     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1435                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1436     int pic_width        = s->sps->width >> s->sps->hshift[1];
1437     int pic_height       = s->sps->height >> s->sps->vshift[1];
1438     Mv *mv0              = &current_mv->mv[0];
1439     Mv *mv1              = &current_mv->mv[1];
1440     int hshift = s->sps->hshift[1];
1441     int vshift = s->sps->vshift[1];
1442
1443     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1444     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1445     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1446     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1447     intptr_t _mx0 = mx0 << (1 - hshift);
1448     intptr_t _my0 = my0 << (1 - vshift);
1449     intptr_t _mx1 = mx1 << (1 - hshift);
1450     intptr_t _my1 = my1 << (1 - vshift);
1451
1452     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1453     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1454     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1455     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1456     int idx = ff_hevc_pel_weight[block_w];
1457     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1458     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1459
1460     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1461         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1462         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1463         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1464         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1465         int buf_offset1 = EPEL_EXTRA_BEFORE *
1466                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1467
1468         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1469                                  edge_emu_stride, src1stride,
1470                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1471                                  x_off0 - EPEL_EXTRA_BEFORE,
1472                                  y_off0 - EPEL_EXTRA_BEFORE,
1473                                  pic_width, pic_height);
1474
1475         src1 = lc->edge_emu_buffer + buf_offset1;
1476         src1stride = edge_emu_stride;
1477     }
1478
1479     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1480         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1481         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1482         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1483         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1484         int buf_offset1 = EPEL_EXTRA_BEFORE *
1485                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1486
1487         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1488                                  edge_emu_stride, src2stride,
1489                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1490                                  x_off1 - EPEL_EXTRA_BEFORE,
1491                                  y_off1 - EPEL_EXTRA_BEFORE,
1492                                  pic_width, pic_height);
1493
1494         src2 = lc->edge_emu_buffer2 + buf_offset1;
1495         src2stride = edge_emu_stride;
1496     }
1497
1498     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
1499                                                 block_h, _mx0, _my0, block_w);
1500     if (!weight_flag)
1501         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1502                                                        src2, src2stride, tmp, tmpstride,
1503                                                        block_h, _mx1, _my1, block_w);
1504     else
1505         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1506                                                          src2, src2stride, tmp, tmpstride,
1507                                                          block_h,
1508                                                          s->sh.chroma_log2_weight_denom,
1509                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1510                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1511                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1512                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1513                                                          _mx1, _my1, block_w);
1514 }
1515
1516 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1517                                 const Mv *mv, int y0, int height)
1518 {
1519     int y = (mv->y >> 2) + y0 + height + 9;
1520
1521     if (s->threads_type == FF_THREAD_FRAME )
1522         ff_thread_await_progress(&ref->tf, y, 0);
1523 }
1524
1525 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1526                                 int nPbW, int nPbH,
1527                                 int log2_cb_size, int partIdx, int idx)
1528 {
1529 #define POS(c_idx, x, y)                                                              \
1530     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1531                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1532     HEVCLocalContext *lc = s->HEVClc;
1533     int merge_idx = 0;
1534     struct MvField current_mv = {{{ 0 }}};
1535
1536     int min_pu_width = s->sps->min_pu_width;
1537
1538     MvField *tab_mvf = s->ref->tab_mvf;
1539     RefPicList  *refPicList = s->ref->refPicList;
1540     HEVCFrame *ref0, *ref1;
1541     uint8_t *dst0 = POS(0, x0, y0);
1542     uint8_t *dst1 = POS(1, x0, y0);
1543     uint8_t *dst2 = POS(2, x0, y0);
1544     int log2_min_cb_size = s->sps->log2_min_cb_size;
1545     int min_cb_width     = s->sps->min_cb_width;
1546     int x_cb             = x0 >> log2_min_cb_size;
1547     int y_cb             = y0 >> log2_min_cb_size;
1548     int ref_idx[2];
1549     int mvp_flag[2];
1550     int x_pu, y_pu;
1551     int i, j;
1552
1553     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1554         if (s->sh.max_num_merge_cand > 1)
1555             merge_idx = ff_hevc_merge_idx_decode(s);
1556         else
1557             merge_idx = 0;
1558
1559         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1560                                    1 << log2_cb_size,
1561                                    1 << log2_cb_size,
1562                                    log2_cb_size, partIdx,
1563                                    merge_idx, &current_mv);
1564         x_pu = x0 >> s->sps->log2_min_pu_size;
1565         y_pu = y0 >> s->sps->log2_min_pu_size;
1566
1567         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1568             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1569                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1570     } else { /* MODE_INTER */
1571         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1572         if (lc->pu.merge_flag) {
1573             if (s->sh.max_num_merge_cand > 1)
1574                 merge_idx = ff_hevc_merge_idx_decode(s);
1575             else
1576                 merge_idx = 0;
1577
1578             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1579                                        partIdx, merge_idx, &current_mv);
1580             x_pu = x0 >> s->sps->log2_min_pu_size;
1581             y_pu = y0 >> s->sps->log2_min_pu_size;
1582
1583             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1584                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1585                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1586         } else {
1587             enum InterPredIdc inter_pred_idc = PRED_L0;
1588             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1589             current_mv.pred_flag = 0;
1590             if (s->sh.slice_type == B_SLICE)
1591                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1592
1593             if (inter_pred_idc != PRED_L1) {
1594                 if (s->sh.nb_refs[L0]) {
1595                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1596                     current_mv.ref_idx[0] = ref_idx[0];
1597                 }
1598                 current_mv.pred_flag = PF_L0;
1599                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1600                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1601                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1602                                          partIdx, merge_idx, &current_mv,
1603                                          mvp_flag[0], 0);
1604                 current_mv.mv[0].x += lc->pu.mvd.x;
1605                 current_mv.mv[0].y += lc->pu.mvd.y;
1606             }
1607
1608             if (inter_pred_idc != PRED_L0) {
1609                 if (s->sh.nb_refs[L1]) {
1610                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1611                     current_mv.ref_idx[1] = ref_idx[1];
1612                 }
1613
1614                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1615                     lc->pu.mvd.x = 0;
1616                     lc->pu.mvd.y = 0;
1617                 } else {
1618                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1619                 }
1620
1621                 current_mv.pred_flag += PF_L1;
1622                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1623                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1624                                          partIdx, merge_idx, &current_mv,
1625                                          mvp_flag[1], 1);
1626                 current_mv.mv[1].x += lc->pu.mvd.x;
1627                 current_mv.mv[1].y += lc->pu.mvd.y;
1628             }
1629
1630             x_pu = x0 >> s->sps->log2_min_pu_size;
1631             y_pu = y0 >> s->sps->log2_min_pu_size;
1632
1633             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1634                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1635                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1636         }
1637     }
1638
1639     if (current_mv.pred_flag & PF_L0) {
1640         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1641         if (!ref0)
1642             return;
1643         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1644     }
1645     if (current_mv.pred_flag & PF_L1) {
1646         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1647         if (!ref1)
1648             return;
1649         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1650     }
1651
1652     if (current_mv.pred_flag == PF_L0) {
1653         int x0_c = x0 >> s->sps->hshift[1];
1654         int y0_c = y0 >> s->sps->vshift[1];
1655         int nPbW_c = nPbW >> s->sps->hshift[1];
1656         int nPbH_c = nPbH >> s->sps->vshift[1];
1657
1658         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1659                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1660                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1661                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1662
1663         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1664                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1665                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1666         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1667                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1668                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1669     } else if (current_mv.pred_flag == PF_L1) {
1670         int x0_c = x0 >> s->sps->hshift[1];
1671         int y0_c = y0 >> s->sps->vshift[1];
1672         int nPbW_c = nPbW >> s->sps->hshift[1];
1673         int nPbH_c = nPbH >> s->sps->vshift[1];
1674
1675         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1676                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1677                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1678                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1679
1680         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1681                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1682                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1683
1684         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1685                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1686                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1687     } else if (current_mv.pred_flag == PF_BI) {
1688         int x0_c = x0 >> s->sps->hshift[1];
1689         int y0_c = y0 >> s->sps->vshift[1];
1690         int nPbW_c = nPbW >> s->sps->hshift[1];
1691         int nPbH_c = nPbH >> s->sps->vshift[1];
1692
1693         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1694                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1695                    ref1->frame, &current_mv.mv[1], &current_mv);
1696
1697         chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1698                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1699
1700         chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1701                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1702     }
1703 }
1704
1705 /**
1706  * 8.4.1
1707  */
1708 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1709                                 int prev_intra_luma_pred_flag)
1710 {
1711     HEVCLocalContext *lc = s->HEVClc;
1712     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1713     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1714     int min_pu_width     = s->sps->min_pu_width;
1715     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1716     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1717     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1718
1719     int cand_up   = (lc->ctb_up_flag || y0b) ?
1720                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1721     int cand_left = (lc->ctb_left_flag || x0b) ?
1722                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1723
1724     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1725
1726     MvField *tab_mvf = s->ref->tab_mvf;
1727     int intra_pred_mode;
1728     int candidate[3];
1729     int i, j;
1730
1731     // intra_pred_mode prediction does not cross vertical CTB boundaries
1732     if ((y0 - 1) < y_ctb)
1733         cand_up = INTRA_DC;
1734
1735     if (cand_left == cand_up) {
1736         if (cand_left < 2) {
1737             candidate[0] = INTRA_PLANAR;
1738             candidate[1] = INTRA_DC;
1739             candidate[2] = INTRA_ANGULAR_26;
1740         } else {
1741             candidate[0] = cand_left;
1742             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1743             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1744         }
1745     } else {
1746         candidate[0] = cand_left;
1747         candidate[1] = cand_up;
1748         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1749             candidate[2] = INTRA_PLANAR;
1750         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1751             candidate[2] = INTRA_DC;
1752         } else {
1753             candidate[2] = INTRA_ANGULAR_26;
1754         }
1755     }
1756
1757     if (prev_intra_luma_pred_flag) {
1758         intra_pred_mode = candidate[lc->pu.mpm_idx];
1759     } else {
1760         if (candidate[0] > candidate[1])
1761             FFSWAP(uint8_t, candidate[0], candidate[1]);
1762         if (candidate[0] > candidate[2])
1763             FFSWAP(uint8_t, candidate[0], candidate[2]);
1764         if (candidate[1] > candidate[2])
1765             FFSWAP(uint8_t, candidate[1], candidate[2]);
1766
1767         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1768         for (i = 0; i < 3; i++)
1769             if (intra_pred_mode >= candidate[i])
1770                 intra_pred_mode++;
1771     }
1772
1773     /* write the intra prediction units into the mv array */
1774     if (!size_in_pus)
1775         size_in_pus = 1;
1776     for (i = 0; i < size_in_pus; i++) {
1777         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1778                intra_pred_mode, size_in_pus);
1779
1780         for (j = 0; j < size_in_pus; j++) {
1781             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1782         }
1783     }
1784
1785     return intra_pred_mode;
1786 }
1787
1788 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1789                                           int log2_cb_size, int ct_depth)
1790 {
1791     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1792     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1793     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1794     int y;
1795
1796     for (y = 0; y < length; y++)
1797         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1798                ct_depth, length);
1799 }
1800
1801 static const uint8_t tab_mode_idx[] = {
1802      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1803     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1804
1805 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1806                                   int log2_cb_size)
1807 {
1808     HEVCLocalContext *lc = s->HEVClc;
1809     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1810     uint8_t prev_intra_luma_pred_flag[4];
1811     int split   = lc->cu.part_mode == PART_NxN;
1812     int pb_size = (1 << log2_cb_size) >> split;
1813     int side    = split + 1;
1814     int chroma_mode;
1815     int i, j;
1816
1817     for (i = 0; i < side; i++)
1818         for (j = 0; j < side; j++)
1819             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1820
1821     for (i = 0; i < side; i++) {
1822         for (j = 0; j < side; j++) {
1823             if (prev_intra_luma_pred_flag[2 * i + j])
1824                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1825             else
1826                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1827
1828             lc->pu.intra_pred_mode[2 * i + j] =
1829                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1830                                      prev_intra_luma_pred_flag[2 * i + j]);
1831         }
1832     }
1833
1834     if (s->sps->chroma_format_idc == 3) {
1835         for (i = 0; i < side; i++) {
1836             for (j = 0; j < side; j++) {
1837                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1838                 if (chroma_mode != 4) {
1839                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1840                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1841                     else
1842                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1843                 } else {
1844                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1845                 }
1846             }
1847         }
1848     } else if (s->sps->chroma_format_idc == 2) {
1849         int mode_idx;
1850         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1851         if (chroma_mode != 4) {
1852             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1853                 mode_idx = 34;
1854             else
1855                 mode_idx = intra_chroma_table[chroma_mode];
1856         } else {
1857             mode_idx = lc->pu.intra_pred_mode[0];
1858         }
1859         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1860     } else if (s->sps->chroma_format_idc != 0) {
1861         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1862         if (chroma_mode != 4) {
1863             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1864                 lc->pu.intra_pred_mode_c[0] = 34;
1865             else
1866                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1867         } else {
1868             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1869         }
1870     }
1871 }
1872
1873 static void intra_prediction_unit_default_value(HEVCContext *s,
1874                                                 int x0, int y0,
1875                                                 int log2_cb_size)
1876 {
1877     HEVCLocalContext *lc = s->HEVClc;
1878     int pb_size          = 1 << log2_cb_size;
1879     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1880     int min_pu_width     = s->sps->min_pu_width;
1881     MvField *tab_mvf     = s->ref->tab_mvf;
1882     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1883     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1884     int j, k;
1885
1886     if (size_in_pus == 0)
1887         size_in_pus = 1;
1888     for (j = 0; j < size_in_pus; j++)
1889         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1890     if (lc->cu.pred_mode == MODE_INTRA)
1891         for (j = 0; j < size_in_pus; j++)
1892             for (k = 0; k < size_in_pus; k++)
1893                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1894 }
1895
1896 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1897 {
1898     int cb_size          = 1 << log2_cb_size;
1899     HEVCLocalContext *lc = s->HEVClc;
1900     int log2_min_cb_size = s->sps->log2_min_cb_size;
1901     int length           = cb_size >> log2_min_cb_size;
1902     int min_cb_width     = s->sps->min_cb_width;
1903     int x_cb             = x0 >> log2_min_cb_size;
1904     int y_cb             = y0 >> log2_min_cb_size;
1905     int idx              = log2_cb_size - 2;
1906     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1907     int x, y, ret;
1908
1909     lc->cu.x                = x0;
1910     lc->cu.y                = y0;
1911     lc->cu.rqt_root_cbf     = 1;
1912     lc->cu.pred_mode        = MODE_INTRA;
1913     lc->cu.part_mode        = PART_2Nx2N;
1914     lc->cu.intra_split_flag = 0;
1915     lc->cu.pcm_flag         = 0;
1916
1917     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1918     for (x = 0; x < 4; x++)
1919         lc->pu.intra_pred_mode[x] = 1;
1920     if (s->pps->transquant_bypass_enable_flag) {
1921         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1922         if (lc->cu.cu_transquant_bypass_flag)
1923             set_deblocking_bypass(s, x0, y0, log2_cb_size);
1924     } else
1925         lc->cu.cu_transquant_bypass_flag = 0;
1926
1927     if (s->sh.slice_type != I_SLICE) {
1928         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
1929
1930         x = y_cb * min_cb_width + x_cb;
1931         for (y = 0; y < length; y++) {
1932             memset(&s->skip_flag[x], skip_flag, length);
1933             x += min_cb_width;
1934         }
1935         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
1936     }
1937
1938     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1939         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
1940         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1941
1942         if (!s->sh.disable_deblocking_filter_flag)
1943             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1944     } else {
1945         if (s->sh.slice_type != I_SLICE)
1946             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
1947         if (lc->cu.pred_mode != MODE_INTRA ||
1948             log2_cb_size == s->sps->log2_min_cb_size) {
1949             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
1950             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
1951                                       lc->cu.pred_mode == MODE_INTRA;
1952         }
1953
1954         if (lc->cu.pred_mode == MODE_INTRA) {
1955             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
1956                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
1957                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
1958                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
1959             }
1960             if (lc->cu.pcm_flag) {
1961                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1962                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
1963                 if (s->sps->pcm.loop_filter_disable_flag)
1964                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
1965
1966                 if (ret < 0)
1967                     return ret;
1968             } else {
1969                 intra_prediction_unit(s, x0, y0, log2_cb_size);
1970             }
1971         } else {
1972             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
1973             switch (lc->cu.part_mode) {
1974             case PART_2Nx2N:
1975                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
1976                 break;
1977             case PART_2NxN:
1978                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
1979                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
1980                 break;
1981             case PART_Nx2N:
1982                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
1983                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
1984                 break;
1985             case PART_2NxnU:
1986                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
1987                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
1988                 break;
1989             case PART_2NxnD:
1990                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
1991                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
1992                 break;
1993             case PART_nLx2N:
1994                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
1995                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
1996                 break;
1997             case PART_nRx2N:
1998                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
1999                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2000                 break;
2001             case PART_NxN:
2002                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2003                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2004                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2005                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2006                 break;
2007             }
2008         }
2009
2010         if (!lc->cu.pcm_flag) {
2011             if (lc->cu.pred_mode != MODE_INTRA &&
2012                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2013                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2014             }
2015             if (lc->cu.rqt_root_cbf) {
2016                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2017                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2018                                          s->sps->max_transform_hierarchy_depth_inter;
2019                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2020                                          log2_cb_size,
2021                                          log2_cb_size, 0, 0);
2022                 if (ret < 0)
2023                     return ret;
2024             } else {
2025                 if (!s->sh.disable_deblocking_filter_flag)
2026                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2027             }
2028         }
2029     }
2030
2031     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2032         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2033
2034     x = y_cb * min_cb_width + x_cb;
2035     for (y = 0; y < length; y++) {
2036         memset(&s->qp_y_tab[x], lc->qp_y, length);
2037         x += min_cb_width;
2038     }
2039
2040     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2041        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2042         lc->qPy_pred = lc->qp_y;
2043     }
2044
2045     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2046
2047     return 0;
2048 }
2049
2050 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2051                                int log2_cb_size, int cb_depth)
2052 {
2053     HEVCLocalContext *lc = s->HEVClc;
2054     const int cb_size    = 1 << log2_cb_size;
2055     int ret;
2056     int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2057     int split_cu_flag;
2058
2059     lc->ct.depth = cb_depth;
2060     if (x0 + cb_size <= s->sps->width  &&
2061         y0 + cb_size <= s->sps->height &&
2062         log2_cb_size > s->sps->log2_min_cb_size) {
2063         split_cu_flag = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2064     } else {
2065         split_cu_flag = (log2_cb_size > s->sps->log2_min_cb_size);
2066     }
2067     if (s->pps->cu_qp_delta_enabled_flag &&
2068         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2069         lc->tu.is_cu_qp_delta_coded = 0;
2070         lc->tu.cu_qp_delta          = 0;
2071     }
2072
2073     if (split_cu_flag) {
2074         const int cb_size_split = cb_size >> 1;
2075         const int x1 = x0 + cb_size_split;
2076         const int y1 = y0 + cb_size_split;
2077
2078         int more_data = 0;
2079
2080         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2081         if (more_data < 0)
2082             return more_data;
2083
2084         if (more_data && x1 < s->sps->width) {
2085             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2086             if (more_data < 0)
2087                 return more_data;
2088         }
2089         if (more_data && y1 < s->sps->height) {
2090             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2091             if (more_data < 0)
2092                 return more_data;
2093         }
2094         if (more_data && x1 < s->sps->width &&
2095             y1 < s->sps->height) {
2096             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2097             if (more_data < 0)
2098                 return more_data;
2099         }
2100
2101         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2102             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2103             lc->qPy_pred = lc->qp_y;
2104
2105         if (more_data)
2106             return ((x1 + cb_size_split) < s->sps->width ||
2107                     (y1 + cb_size_split) < s->sps->height);
2108         else
2109             return 0;
2110     } else {
2111         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2112         if (ret < 0)
2113             return ret;
2114         if ((!((x0 + cb_size) %
2115                (1 << (s->sps->log2_ctb_size))) ||
2116              (x0 + cb_size >= s->sps->width)) &&
2117             (!((y0 + cb_size) %
2118                (1 << (s->sps->log2_ctb_size))) ||
2119              (y0 + cb_size >= s->sps->height))) {
2120             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2121             return !end_of_slice_flag;
2122         } else {
2123             return 1;
2124         }
2125     }
2126
2127     return 0;
2128 }
2129
2130 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2131                                  int ctb_addr_ts)
2132 {
2133     HEVCLocalContext *lc  = s->HEVClc;
2134     int ctb_size          = 1 << s->sps->log2_ctb_size;
2135     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2136     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2137
2138     int tile_left_boundary, tile_up_boundary;
2139     int slice_left_boundary, slice_up_boundary;
2140
2141     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2142
2143     if (s->pps->entropy_coding_sync_enabled_flag) {
2144         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2145             lc->first_qp_group = 1;
2146         lc->end_of_tiles_x = s->sps->width;
2147     } else if (s->pps->tiles_enabled_flag) {
2148         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2149             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2150             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2151             lc->first_qp_group   = 1;
2152         }
2153     } else {
2154         lc->end_of_tiles_x = s->sps->width;
2155     }
2156
2157     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2158
2159     if (s->pps->tiles_enabled_flag) {
2160         tile_left_boundary = x_ctb > 0 &&
2161                              s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
2162         slice_left_boundary = x_ctb > 0 &&
2163                               s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1];
2164         tile_up_boundary  = y_ctb > 0 &&
2165                             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2166         slice_up_boundary = y_ctb > 0 &&
2167                             s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2168     } else {
2169         tile_left_boundary =
2170         tile_up_boundary   = 0;
2171         slice_left_boundary = ctb_addr_in_slice <= 0;
2172         slice_up_boundary   = ctb_addr_in_slice < s->sps->ctb_width;
2173     }
2174     lc->slice_or_tiles_left_boundary = slice_left_boundary + (tile_left_boundary << 1);
2175     lc->slice_or_tiles_up_boundary   = slice_up_boundary   + (tile_up_boundary   << 1);
2176     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0)                  && !tile_left_boundary);
2177     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !tile_up_boundary);
2178     lc->ctb_up_right_flag = ((y_ctb > 0)                 && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2179     lc->ctb_up_left_flag  = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2180 }
2181
2182 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2183 {
2184     HEVCContext *s  = avctxt->priv_data;
2185     int ctb_size    = 1 << s->sps->log2_ctb_size;
2186     int more_data   = 1;
2187     int x_ctb       = 0;
2188     int y_ctb       = 0;
2189     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2190
2191     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2192         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2193         return AVERROR_INVALIDDATA;
2194     }
2195
2196     if (s->sh.dependent_slice_segment_flag) {
2197         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2198         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2199             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2200             return AVERROR_INVALIDDATA;
2201         }
2202     }
2203
2204     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2205         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2206
2207         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2208         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2209         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2210
2211         ff_hevc_cabac_init(s, ctb_addr_ts);
2212
2213         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2214
2215         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2216         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2217         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2218
2219         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2220         if (more_data < 0) {
2221             s->tab_slice_address[ctb_addr_rs] = -1;
2222             return more_data;
2223         }
2224
2225
2226         ctb_addr_ts++;
2227         ff_hevc_save_states(s, ctb_addr_ts);
2228         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2229     }
2230
2231     if (x_ctb + ctb_size >= s->sps->width &&
2232         y_ctb + ctb_size >= s->sps->height)
2233         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2234
2235     return ctb_addr_ts;
2236 }
2237
2238 static int hls_slice_data(HEVCContext *s)
2239 {
2240     int arg[2];
2241     int ret[2];
2242
2243     arg[0] = 0;
2244     arg[1] = 1;
2245
2246     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2247     return ret[0];
2248 }
2249 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2250 {
2251     HEVCContext *s1  = avctxt->priv_data, *s;
2252     HEVCLocalContext *lc;
2253     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2254     int more_data   = 1;
2255     int *ctb_row_p    = input_ctb_row;
2256     int ctb_row = ctb_row_p[job];
2257     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2258     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2259     int thread = ctb_row % s1->threads_number;
2260     int ret;
2261
2262     s = s1->sList[self_id];
2263     lc = s->HEVClc;
2264
2265     if(ctb_row) {
2266         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2267
2268         if (ret < 0)
2269             return ret;
2270         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2271     }
2272
2273     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2274         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2275         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2276
2277         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2278
2279         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2280
2281         if (avpriv_atomic_int_get(&s1->wpp_err)){
2282             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2283             return 0;
2284         }
2285
2286         ff_hevc_cabac_init(s, ctb_addr_ts);
2287         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2288         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2289
2290         if (more_data < 0) {
2291             s->tab_slice_address[ctb_addr_rs] = -1;
2292             return more_data;
2293         }
2294
2295         ctb_addr_ts++;
2296
2297         ff_hevc_save_states(s, ctb_addr_ts);
2298         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2299         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2300
2301         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2302             avpriv_atomic_int_set(&s1->wpp_err,  1);
2303             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2304             return 0;
2305         }
2306
2307         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2308             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2309             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2310             return ctb_addr_ts;
2311         }
2312         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2313         x_ctb+=ctb_size;
2314
2315         if(x_ctb >= s->sps->width) {
2316             break;
2317         }
2318     }
2319     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2320
2321     return 0;
2322 }
2323
2324 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2325 {
2326     HEVCLocalContext *lc = s->HEVClc;
2327     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2328     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2329     int offset;
2330     int startheader, cmpt = 0;
2331     int i, j, res = 0;
2332
2333
2334     if (!s->sList[1]) {
2335         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2336
2337
2338         for (i = 1; i < s->threads_number; i++) {
2339             s->sList[i] = av_malloc(sizeof(HEVCContext));
2340             memcpy(s->sList[i], s, sizeof(HEVCContext));
2341             s->HEVClcList[i] = av_malloc(sizeof(HEVCLocalContext));
2342             s->sList[i]->HEVClc = s->HEVClcList[i];
2343         }
2344     }
2345
2346     offset = (lc->gb.index >> 3);
2347
2348     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2349         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2350             startheader--;
2351             cmpt++;
2352         }
2353     }
2354
2355     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2356         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2357         for (j = 0, cmpt = 0, startheader = offset
2358              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2359             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2360                 startheader--;
2361                 cmpt++;
2362             }
2363         }
2364         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2365         s->sh.offset[i - 1] = offset;
2366
2367     }
2368     if (s->sh.num_entry_point_offsets != 0) {
2369         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2370         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2371         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2372
2373     }
2374     s->data = nal;
2375
2376     for (i = 1; i < s->threads_number; i++) {
2377         s->sList[i]->HEVClc->first_qp_group = 1;
2378         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2379         memcpy(s->sList[i], s, sizeof(HEVCContext));
2380         s->sList[i]->HEVClc = s->HEVClcList[i];
2381     }
2382
2383     avpriv_atomic_int_set(&s->wpp_err, 0);
2384     ff_reset_entries(s->avctx);
2385
2386     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2387         arg[i] = i;
2388         ret[i] = 0;
2389     }
2390
2391     if (s->pps->entropy_coding_sync_enabled_flag)
2392         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2393
2394     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2395         res += ret[i];
2396     av_free(ret);
2397     av_free(arg);
2398     return res;
2399 }
2400
2401 /**
2402  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2403  * 0 if the unit should be skipped, 1 otherwise
2404  */
2405 static int hls_nal_unit(HEVCContext *s)
2406 {
2407     GetBitContext *gb = &s->HEVClc->gb;
2408     int nuh_layer_id;
2409
2410     if (get_bits1(gb) != 0)
2411         return AVERROR_INVALIDDATA;
2412
2413     s->nal_unit_type = get_bits(gb, 6);
2414
2415     nuh_layer_id   = get_bits(gb, 6);
2416     s->temporal_id = get_bits(gb, 3) - 1;
2417     if (s->temporal_id < 0)
2418         return AVERROR_INVALIDDATA;
2419
2420     av_log(s->avctx, AV_LOG_DEBUG,
2421            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2422            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2423
2424     return nuh_layer_id == 0;
2425 }
2426
2427 static int set_side_data(HEVCContext *s)
2428 {
2429     AVFrame *out = s->ref->frame;
2430
2431     if (s->sei_frame_packing_present &&
2432         s->frame_packing_arrangement_type >= 3 &&
2433         s->frame_packing_arrangement_type <= 5 &&
2434         s->content_interpretation_type > 0 &&
2435         s->content_interpretation_type < 3) {
2436         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2437         if (!stereo)
2438             return AVERROR(ENOMEM);
2439
2440         switch (s->frame_packing_arrangement_type) {
2441         case 3:
2442             if (s->quincunx_subsampling)
2443                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2444             else
2445                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2446             break;
2447         case 4:
2448             stereo->type = AV_STEREO3D_TOPBOTTOM;
2449             break;
2450         case 5:
2451             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2452             break;
2453         }
2454
2455         if (s->content_interpretation_type == 2)
2456             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2457     }
2458
2459     if (s->sei_display_orientation_present &&
2460         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2461         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2462         AVFrameSideData *rotation = av_frame_new_side_data(out,
2463                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2464                                                            sizeof(int32_t) * 9);
2465         if (!rotation)
2466             return AVERROR(ENOMEM);
2467
2468         av_display_rotation_set((int32_t *)rotation->data, angle);
2469         av_display_matrix_flip((int32_t *)rotation->data,
2470                                s->sei_vflip, s->sei_hflip);
2471     }
2472
2473     return 0;
2474 }
2475
2476 static int hevc_frame_start(HEVCContext *s)
2477 {
2478     HEVCLocalContext *lc = s->HEVClc;
2479     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2480                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2481     int ret;
2482     AVFrame *cur_frame;
2483
2484     memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
2485     memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
2486     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2487     memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
2488     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2489
2490     s->is_decoded        = 0;
2491     s->first_nal_type    = s->nal_unit_type;
2492
2493     if (s->pps->tiles_enabled_flag)
2494         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2495
2496     ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
2497                               s->poc);
2498     if (ret < 0)
2499         goto fail;
2500
2501     ret = ff_hevc_frame_rps(s);
2502     if (ret < 0) {
2503         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2504         goto fail;
2505     }
2506
2507     s->ref->frame->key_frame = IS_IRAP(s);
2508
2509     ret = set_side_data(s);
2510     if (ret < 0)
2511         goto fail;
2512
2513     cur_frame = s->sps->sao_enabled ? s->sao_frame : s->frame;
2514     cur_frame->pict_type = 3 - s->sh.slice_type;
2515
2516     av_frame_unref(s->output_frame);
2517     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2518     if (ret < 0)
2519         goto fail;
2520
2521     ff_thread_finish_setup(s->avctx);
2522
2523     return 0;
2524
2525 fail:
2526     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2527         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2528     s->ref = NULL;
2529     return ret;
2530 }
2531
2532 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2533 {
2534     HEVCLocalContext *lc = s->HEVClc;
2535     GetBitContext *gb    = &lc->gb;
2536     int ctb_addr_ts, ret;
2537
2538     ret = init_get_bits8(gb, nal, length);
2539     if (ret < 0)
2540         return ret;
2541
2542     ret = hls_nal_unit(s);
2543     if (ret < 0) {
2544         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2545                s->nal_unit_type);
2546         goto fail;
2547     } else if (!ret)
2548         return 0;
2549
2550     switch (s->nal_unit_type) {
2551     case NAL_VPS:
2552         ret = ff_hevc_decode_nal_vps(s);
2553         if (ret < 0)
2554             goto fail;
2555         break;
2556     case NAL_SPS:
2557         ret = ff_hevc_decode_nal_sps(s);
2558         if (ret < 0)
2559             goto fail;
2560         break;
2561     case NAL_PPS:
2562         ret = ff_hevc_decode_nal_pps(s);
2563         if (ret < 0)
2564             goto fail;
2565         break;
2566     case NAL_SEI_PREFIX:
2567     case NAL_SEI_SUFFIX:
2568         ret = ff_hevc_decode_nal_sei(s);
2569         if (ret < 0)
2570             goto fail;
2571         break;
2572     case NAL_TRAIL_R:
2573     case NAL_TRAIL_N:
2574     case NAL_TSA_N:
2575     case NAL_TSA_R:
2576     case NAL_STSA_N:
2577     case NAL_STSA_R:
2578     case NAL_BLA_W_LP:
2579     case NAL_BLA_W_RADL:
2580     case NAL_BLA_N_LP:
2581     case NAL_IDR_W_RADL:
2582     case NAL_IDR_N_LP:
2583     case NAL_CRA_NUT:
2584     case NAL_RADL_N:
2585     case NAL_RADL_R:
2586     case NAL_RASL_N:
2587     case NAL_RASL_R:
2588         ret = hls_slice_header(s);
2589         if (ret < 0)
2590             return ret;
2591
2592         if (s->max_ra == INT_MAX) {
2593             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2594                 s->max_ra = s->poc;
2595             } else {
2596                 if (IS_IDR(s))
2597                     s->max_ra = INT_MIN;
2598             }
2599         }
2600
2601         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2602             s->poc <= s->max_ra) {
2603             s->is_decoded = 0;
2604             break;
2605         } else {
2606             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2607                 s->max_ra = INT_MIN;
2608         }
2609
2610         if (s->sh.first_slice_in_pic_flag) {
2611             ret = hevc_frame_start(s);
2612             if (ret < 0)
2613                 return ret;
2614         } else if (!s->ref) {
2615             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2616             goto fail;
2617         }
2618
2619         if (s->nal_unit_type != s->first_nal_type) {
2620             av_log(s->avctx, AV_LOG_ERROR,
2621                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2622                    s->first_nal_type, s->nal_unit_type);
2623             return AVERROR_INVALIDDATA;
2624         }
2625
2626         if (!s->sh.dependent_slice_segment_flag &&
2627             s->sh.slice_type != I_SLICE) {
2628             ret = ff_hevc_slice_rpl(s);
2629             if (ret < 0) {
2630                 av_log(s->avctx, AV_LOG_WARNING,
2631                        "Error constructing the reference lists for the current slice.\n");
2632                 goto fail;
2633             }
2634         }
2635
2636         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2637             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2638         else
2639             ctb_addr_ts = hls_slice_data(s);
2640         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2641             s->is_decoded = 1;
2642         }
2643
2644         if (ctb_addr_ts < 0) {
2645             ret = ctb_addr_ts;
2646             goto fail;
2647         }
2648         break;
2649     case NAL_EOS_NUT:
2650     case NAL_EOB_NUT:
2651         s->seq_decode = (s->seq_decode + 1) & 0xff;
2652         s->max_ra     = INT_MAX;
2653         break;
2654     case NAL_AUD:
2655     case NAL_FD_NUT:
2656         break;
2657     default:
2658         av_log(s->avctx, AV_LOG_INFO,
2659                "Skipping NAL unit %d\n", s->nal_unit_type);
2660     }
2661
2662     return 0;
2663 fail:
2664     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2665         return ret;
2666     return 0;
2667 }
2668
2669 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2670  * between these functions would be nice. */
2671 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2672                          HEVCNAL *nal)
2673 {
2674     int i, si, di;
2675     uint8_t *dst;
2676
2677     s->skipped_bytes = 0;
2678 #define STARTCODE_TEST                                                  \
2679         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2680             if (src[i + 2] != 3) {                                      \
2681                 /* startcode, so we must be past the end */             \
2682                 length = i;                                             \
2683             }                                                           \
2684             break;                                                      \
2685         }
2686 #if HAVE_FAST_UNALIGNED
2687 #define FIND_FIRST_ZERO                                                 \
2688         if (i > 0 && !src[i])                                           \
2689             i--;                                                        \
2690         while (src[i])                                                  \
2691             i++
2692 #if HAVE_FAST_64BIT
2693     for (i = 0; i + 1 < length; i += 9) {
2694         if (!((~AV_RN64A(src + i) &
2695                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2696               0x8000800080008080ULL))
2697             continue;
2698         FIND_FIRST_ZERO;
2699         STARTCODE_TEST;
2700         i -= 7;
2701     }
2702 #else
2703     for (i = 0; i + 1 < length; i += 5) {
2704         if (!((~AV_RN32A(src + i) &
2705                (AV_RN32A(src + i) - 0x01000101U)) &
2706               0x80008080U))
2707             continue;
2708         FIND_FIRST_ZERO;
2709         STARTCODE_TEST;
2710         i -= 3;
2711     }
2712 #endif /* HAVE_FAST_64BIT */
2713 #else
2714     for (i = 0; i + 1 < length; i += 2) {
2715         if (src[i])
2716             continue;
2717         if (i > 0 && src[i - 1] == 0)
2718             i--;
2719         STARTCODE_TEST;
2720     }
2721 #endif /* HAVE_FAST_UNALIGNED */
2722
2723     if (i >= length - 1) { // no escaped 0
2724         nal->data = src;
2725         nal->size = length;
2726         return length;
2727     }
2728
2729     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2730                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2731     if (!nal->rbsp_buffer)
2732         return AVERROR(ENOMEM);
2733
2734     dst = nal->rbsp_buffer;
2735
2736     memcpy(dst, src, i);
2737     si = di = i;
2738     while (si + 2 < length) {
2739         // remove escapes (very rare 1:2^22)
2740         if (src[si + 2] > 3) {
2741             dst[di++] = src[si++];
2742             dst[di++] = src[si++];
2743         } else if (src[si] == 0 && src[si + 1] == 0) {
2744             if (src[si + 2] == 3) { // escape
2745                 dst[di++] = 0;
2746                 dst[di++] = 0;
2747                 si       += 3;
2748
2749                 s->skipped_bytes++;
2750                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2751                     s->skipped_bytes_pos_size *= 2;
2752                     av_reallocp_array(&s->skipped_bytes_pos,
2753                             s->skipped_bytes_pos_size,
2754                             sizeof(*s->skipped_bytes_pos));
2755                     if (!s->skipped_bytes_pos)
2756                         return AVERROR(ENOMEM);
2757                 }
2758                 if (s->skipped_bytes_pos)
2759                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2760                 continue;
2761             } else // next start code
2762                 goto nsc;
2763         }
2764
2765         dst[di++] = src[si++];
2766     }
2767     while (si < length)
2768         dst[di++] = src[si++];
2769
2770 nsc:
2771     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2772
2773     nal->data = dst;
2774     nal->size = di;
2775     return si;
2776 }
2777
2778 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2779 {
2780     int i, consumed, ret = 0;
2781
2782     s->ref = NULL;
2783     s->last_eos = s->eos;
2784     s->eos = 0;
2785
2786     /* split the input packet into NAL units, so we know the upper bound on the
2787      * number of slices in the frame */
2788     s->nb_nals = 0;
2789     while (length >= 4) {
2790         HEVCNAL *nal;
2791         int extract_length = 0;
2792
2793         if (s->is_nalff) {
2794             int i;
2795             for (i = 0; i < s->nal_length_size; i++)
2796                 extract_length = (extract_length << 8) | buf[i];
2797             buf    += s->nal_length_size;
2798             length -= s->nal_length_size;
2799
2800             if (extract_length > length) {
2801                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2802                 ret = AVERROR_INVALIDDATA;
2803                 goto fail;
2804             }
2805         } else {
2806             /* search start code */
2807             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2808                 ++buf;
2809                 --length;
2810                 if (length < 4) {
2811                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2812                     ret = AVERROR_INVALIDDATA;
2813                     goto fail;
2814                 }
2815             }
2816
2817             buf           += 3;
2818             length        -= 3;
2819         }
2820
2821         if (!s->is_nalff)
2822             extract_length = length;
2823
2824         if (s->nals_allocated < s->nb_nals + 1) {
2825             int new_size = s->nals_allocated + 1;
2826             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2827             if (!tmp) {
2828                 ret = AVERROR(ENOMEM);
2829                 goto fail;
2830             }
2831             s->nals = tmp;
2832             memset(s->nals + s->nals_allocated, 0,
2833                    (new_size - s->nals_allocated) * sizeof(*tmp));
2834             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2835             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2836             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2837             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2838             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2839             s->nals_allocated = new_size;
2840         }
2841         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2842         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2843         nal = &s->nals[s->nb_nals];
2844
2845         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2846
2847         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2848         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2849         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2850
2851
2852         if (consumed < 0) {
2853             ret = consumed;
2854             goto fail;
2855         }
2856
2857         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2858         if (ret < 0)
2859             goto fail;
2860         hls_nal_unit(s);
2861
2862         if (s->nal_unit_type == NAL_EOB_NUT ||
2863             s->nal_unit_type == NAL_EOS_NUT)
2864             s->eos = 1;
2865
2866         buf    += consumed;
2867         length -= consumed;
2868     }
2869
2870     /* parse the NAL units */
2871     for (i = 0; i < s->nb_nals; i++) {
2872         int ret;
2873         s->skipped_bytes = s->skipped_bytes_nal[i];
2874         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2875
2876         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2877         if (ret < 0) {
2878             av_log(s->avctx, AV_LOG_WARNING,
2879                    "Error parsing NAL unit #%d.\n", i);
2880             goto fail;
2881         }
2882     }
2883
2884 fail:
2885     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2886         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2887
2888     return ret;
2889 }
2890
2891 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2892 {
2893     int i;
2894     for (i = 0; i < 16; i++)
2895         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2896 }
2897
2898 static int verify_md5(HEVCContext *s, AVFrame *frame)
2899 {
2900     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2901     int pixel_shift;
2902     int i, j;
2903
2904     if (!desc)
2905         return AVERROR(EINVAL);
2906
2907     pixel_shift = desc->comp[0].depth_minus1 > 7;
2908
2909     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2910            s->poc);
2911
2912     /* the checksums are LE, so we have to byteswap for >8bpp formats
2913      * on BE arches */
2914 #if HAVE_BIGENDIAN
2915     if (pixel_shift && !s->checksum_buf) {
2916         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2917                        FFMAX3(frame->linesize[0], frame->linesize[1],
2918                               frame->linesize[2]));
2919         if (!s->checksum_buf)
2920             return AVERROR(ENOMEM);
2921     }
2922 #endif
2923
2924     for (i = 0; frame->data[i]; i++) {
2925         int width  = s->avctx->coded_width;
2926         int height = s->avctx->coded_height;
2927         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2928         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2929         uint8_t md5[16];
2930
2931         av_md5_init(s->md5_ctx);
2932         for (j = 0; j < h; j++) {
2933             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2934 #if HAVE_BIGENDIAN
2935             if (pixel_shift) {
2936                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2937                                     (const uint16_t *) src, w);
2938                 src = s->checksum_buf;
2939             }
2940 #endif
2941             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2942         }
2943         av_md5_final(s->md5_ctx, md5);
2944
2945         if (!memcmp(md5, s->md5[i], 16)) {
2946             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2947             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2948             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2949         } else {
2950             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2951             print_md5(s->avctx, AV_LOG_ERROR, md5);
2952             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2953             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2954             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2955             return AVERROR_INVALIDDATA;
2956         }
2957     }
2958
2959     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2960
2961     return 0;
2962 }
2963
2964 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2965                              AVPacket *avpkt)
2966 {
2967     int ret;
2968     HEVCContext *s = avctx->priv_data;
2969
2970     if (!avpkt->size) {
2971         ret = ff_hevc_output_frame(s, data, 1);
2972         if (ret < 0)
2973             return ret;
2974
2975         *got_output = ret;
2976         return 0;
2977     }
2978
2979     s->ref = NULL;
2980     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2981     if (ret < 0)
2982         return ret;
2983
2984     /* verify the SEI checksum */
2985     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2986         s->is_md5) {
2987         ret = verify_md5(s, s->ref->frame);
2988         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2989             ff_hevc_unref_frame(s, s->ref, ~0);
2990             return ret;
2991         }
2992     }
2993     s->is_md5 = 0;
2994
2995     if (s->is_decoded) {
2996         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2997         s->is_decoded = 0;
2998     }
2999
3000     if (s->output_frame->buf[0]) {
3001         av_frame_move_ref(data, s->output_frame);
3002         *got_output = 1;
3003     }
3004
3005     return avpkt->size;
3006 }
3007
3008 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3009 {
3010     int ret;
3011
3012     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3013     if (ret < 0)
3014         return ret;
3015
3016     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3017     if (!dst->tab_mvf_buf)
3018         goto fail;
3019     dst->tab_mvf = src->tab_mvf;
3020
3021     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3022     if (!dst->rpl_tab_buf)
3023         goto fail;
3024     dst->rpl_tab = src->rpl_tab;
3025
3026     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3027     if (!dst->rpl_buf)
3028         goto fail;
3029
3030     dst->poc        = src->poc;
3031     dst->ctb_count  = src->ctb_count;
3032     dst->window     = src->window;
3033     dst->flags      = src->flags;
3034     dst->sequence   = src->sequence;
3035
3036     return 0;
3037 fail:
3038     ff_hevc_unref_frame(s, dst, ~0);
3039     return AVERROR(ENOMEM);
3040 }
3041
3042 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3043 {
3044     HEVCContext       *s = avctx->priv_data;
3045     HEVCLocalContext *lc = s->HEVClc;
3046     int i;
3047
3048     pic_arrays_free(s);
3049
3050     av_freep(&s->md5_ctx);
3051
3052     for(i=0; i < s->nals_allocated; i++) {
3053         av_freep(&s->skipped_bytes_pos_nal[i]);
3054     }
3055     av_freep(&s->skipped_bytes_pos_size_nal);
3056     av_freep(&s->skipped_bytes_nal);
3057     av_freep(&s->skipped_bytes_pos_nal);
3058
3059     av_freep(&s->cabac_state);
3060
3061     av_frame_free(&s->tmp_frame);
3062     av_frame_free(&s->output_frame);
3063
3064     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3065         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3066         av_frame_free(&s->DPB[i].frame);
3067     }
3068
3069     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3070         av_buffer_unref(&s->vps_list[i]);
3071     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3072         av_buffer_unref(&s->sps_list[i]);
3073     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3074         av_buffer_unref(&s->pps_list[i]);
3075     s->sps = NULL;
3076     s->pps = NULL;
3077     s->vps = NULL;
3078
3079     av_buffer_unref(&s->current_sps);
3080
3081     av_freep(&s->sh.entry_point_offset);
3082     av_freep(&s->sh.offset);
3083     av_freep(&s->sh.size);
3084
3085     for (i = 1; i < s->threads_number; i++) {
3086         lc = s->HEVClcList[i];
3087         if (lc) {
3088             av_freep(&s->HEVClcList[i]);
3089             av_freep(&s->sList[i]);
3090         }
3091     }
3092     if (s->HEVClc == s->HEVClcList[0])
3093         s->HEVClc = NULL;
3094     av_freep(&s->HEVClcList[0]);
3095
3096     for (i = 0; i < s->nals_allocated; i++)
3097         av_freep(&s->nals[i].rbsp_buffer);
3098     av_freep(&s->nals);
3099     s->nals_allocated = 0;
3100
3101     return 0;
3102 }
3103
3104 static av_cold int hevc_init_context(AVCodecContext *avctx)
3105 {
3106     HEVCContext *s = avctx->priv_data;
3107     int i;
3108
3109     s->avctx = avctx;
3110
3111     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3112     if (!s->HEVClc)
3113         goto fail;
3114     s->HEVClcList[0] = s->HEVClc;
3115     s->sList[0] = s;
3116
3117     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3118     if (!s->cabac_state)
3119         goto fail;
3120
3121     s->tmp_frame = av_frame_alloc();
3122     if (!s->tmp_frame)
3123         goto fail;
3124
3125     s->output_frame = av_frame_alloc();
3126     if (!s->output_frame)
3127         goto fail;
3128
3129     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3130         s->DPB[i].frame = av_frame_alloc();
3131         if (!s->DPB[i].frame)
3132             goto fail;
3133         s->DPB[i].tf.f = s->DPB[i].frame;
3134     }
3135
3136     s->max_ra = INT_MAX;
3137
3138     s->md5_ctx = av_md5_alloc();
3139     if (!s->md5_ctx)
3140         goto fail;
3141
3142     ff_bswapdsp_init(&s->bdsp);
3143
3144     s->context_initialized = 1;
3145     s->eos = 0;
3146
3147     return 0;
3148
3149 fail:
3150     hevc_decode_free(avctx);
3151     return AVERROR(ENOMEM);
3152 }
3153
3154 static int hevc_update_thread_context(AVCodecContext *dst,
3155                                       const AVCodecContext *src)
3156 {
3157     HEVCContext *s  = dst->priv_data;
3158     HEVCContext *s0 = src->priv_data;
3159     int i, ret;
3160
3161     if (!s->context_initialized) {
3162         ret = hevc_init_context(dst);
3163         if (ret < 0)
3164             return ret;
3165     }
3166
3167     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3168         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3169         if (s0->DPB[i].frame->buf[0]) {
3170             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3171             if (ret < 0)
3172                 return ret;
3173         }
3174     }
3175
3176     if (s->sps != s0->sps)
3177         s->sps = NULL;
3178     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3179         av_buffer_unref(&s->vps_list[i]);
3180         if (s0->vps_list[i]) {
3181             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3182             if (!s->vps_list[i])
3183                 return AVERROR(ENOMEM);
3184         }
3185     }
3186
3187     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3188         av_buffer_unref(&s->sps_list[i]);
3189         if (s0->sps_list[i]) {
3190             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3191             if (!s->sps_list[i])
3192                 return AVERROR(ENOMEM);
3193         }
3194     }
3195
3196     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3197         av_buffer_unref(&s->pps_list[i]);
3198         if (s0->pps_list[i]) {
3199             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3200             if (!s->pps_list[i])
3201                 return AVERROR(ENOMEM);
3202         }
3203     }
3204
3205     if (s->current_sps && s->sps == (HEVCSPS*)s->current_sps->data)
3206         s->sps = NULL;
3207     av_buffer_unref(&s->current_sps);
3208
3209     if (s->sps != s0->sps)
3210         ret = set_sps(s, s0->sps);
3211
3212     s->seq_decode = s0->seq_decode;
3213     s->seq_output = s0->seq_output;
3214     s->pocTid0    = s0->pocTid0;
3215     s->max_ra     = s0->max_ra;
3216     s->eos        = s0->eos;
3217
3218     s->is_nalff        = s0->is_nalff;
3219     s->nal_length_size = s0->nal_length_size;
3220
3221     s->threads_number      = s0->threads_number;
3222     s->threads_type        = s0->threads_type;
3223
3224     if (s0->eos) {
3225         s->seq_decode = (s->seq_decode + 1) & 0xff;
3226         s->max_ra = INT_MAX;
3227     }
3228
3229     return 0;
3230 }
3231
3232 static int hevc_decode_extradata(HEVCContext *s)
3233 {
3234     AVCodecContext *avctx = s->avctx;
3235     GetByteContext gb;
3236     int ret;
3237
3238     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3239
3240     if (avctx->extradata_size > 3 &&
3241         (avctx->extradata[0] || avctx->extradata[1] ||
3242          avctx->extradata[2] > 1)) {
3243         /* It seems the extradata is encoded as hvcC format.
3244          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3245          * is finalized. When finalized, configurationVersion will be 1 and we
3246          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3247         int i, j, num_arrays, nal_len_size;
3248
3249         s->is_nalff = 1;
3250
3251         bytestream2_skip(&gb, 21);
3252         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3253         num_arrays   = bytestream2_get_byte(&gb);
3254
3255         /* nal units in the hvcC always have length coded with 2 bytes,
3256          * so put a fake nal_length_size = 2 while parsing them */
3257         s->nal_length_size = 2;
3258
3259         /* Decode nal units from hvcC. */
3260         for (i = 0; i < num_arrays; i++) {
3261             int type = bytestream2_get_byte(&gb) & 0x3f;
3262             int cnt  = bytestream2_get_be16(&gb);
3263
3264             for (j = 0; j < cnt; j++) {
3265                 // +2 for the nal size field
3266                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3267                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3268                     av_log(s->avctx, AV_LOG_ERROR,
3269                            "Invalid NAL unit size in extradata.\n");
3270                     return AVERROR_INVALIDDATA;
3271                 }
3272
3273                 ret = decode_nal_units(s, gb.buffer, nalsize);
3274                 if (ret < 0) {
3275                     av_log(avctx, AV_LOG_ERROR,
3276                            "Decoding nal unit %d %d from hvcC failed\n",
3277                            type, i);
3278                     return ret;
3279                 }
3280                 bytestream2_skip(&gb, nalsize);
3281             }
3282         }
3283
3284         /* Now store right nal length size, that will be used to parse
3285          * all other nals */
3286         s->nal_length_size = nal_len_size;
3287     } else {
3288         s->is_nalff = 0;
3289         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3290         if (ret < 0)
3291             return ret;
3292     }
3293     return 0;
3294 }
3295
3296 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3297 {
3298     HEVCContext *s = avctx->priv_data;
3299     int ret;
3300
3301     ff_init_cabac_states();
3302
3303     avctx->internal->allocate_progress = 1;
3304
3305     ret = hevc_init_context(avctx);
3306     if (ret < 0)
3307         return ret;
3308
3309     s->enable_parallel_tiles = 0;
3310     s->picture_struct = 0;
3311
3312     if(avctx->active_thread_type & FF_THREAD_SLICE)
3313         s->threads_number = avctx->thread_count;
3314     else
3315         s->threads_number = 1;
3316
3317     if (avctx->extradata_size > 0 && avctx->extradata) {
3318         ret = hevc_decode_extradata(s);
3319         if (ret < 0) {
3320             hevc_decode_free(avctx);
3321             return ret;
3322         }
3323     }
3324
3325     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3326             s->threads_type = FF_THREAD_FRAME;
3327         else
3328             s->threads_type = FF_THREAD_SLICE;
3329
3330     return 0;
3331 }
3332
3333 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3334 {
3335     HEVCContext *s = avctx->priv_data;
3336     int ret;
3337
3338     memset(s, 0, sizeof(*s));
3339
3340     ret = hevc_init_context(avctx);
3341     if (ret < 0)
3342         return ret;
3343
3344     return 0;
3345 }
3346
3347 static void hevc_decode_flush(AVCodecContext *avctx)
3348 {
3349     HEVCContext *s = avctx->priv_data;
3350     ff_hevc_flush_dpb(s);
3351     s->max_ra = INT_MAX;
3352 }
3353
3354 #define OFFSET(x) offsetof(HEVCContext, x)
3355 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3356
3357 static const AVProfile profiles[] = {
3358     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3359     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3360     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3361     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3362     { FF_PROFILE_UNKNOWN },
3363 };
3364
3365 static const AVOption options[] = {
3366     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3367         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3368     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3369         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3370     { NULL },
3371 };
3372
3373 static const AVClass hevc_decoder_class = {
3374     .class_name = "HEVC decoder",
3375     .item_name  = av_default_item_name,
3376     .option     = options,
3377     .version    = LIBAVUTIL_VERSION_INT,
3378 };
3379
3380 AVCodec ff_hevc_decoder = {
3381     .name                  = "hevc",
3382     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3383     .type                  = AVMEDIA_TYPE_VIDEO,
3384     .id                    = AV_CODEC_ID_HEVC,
3385     .priv_data_size        = sizeof(HEVCContext),
3386     .priv_class            = &hevc_decoder_class,
3387     .init                  = hevc_decode_init,
3388     .close                 = hevc_decode_free,
3389     .decode                = hevc_decode_frame,
3390     .flush                 = hevc_decode_flush,
3391     .update_thread_context = hevc_update_thread_context,
3392     .init_thread_copy      = hevc_init_thread_copy,
3393     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3394                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3395     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3396 };