git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc(pic_size_in_ctb);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc((sps->min_pu_width + 1) * (sps->min_pu_height + 1));
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_malloc(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146
 147     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 148     if (s->sps->chroma_format_idc != 0) {
 149         int delta = get_se_golomb(gb);
 150         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 151     }
 152
 153     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 154         luma_weight_l0_flag[i] = get_bits1(gb);
 155         if (!luma_weight_l0_flag[i]) {
 156             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 157             s->sh.luma_offset_l0[i] = 0;
 158         }
 159     }
 160     if (s->sps->chroma_format_idc != 0) {
 161         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 162             chroma_weight_l0_flag[i] = get_bits1(gb);
 163     } else {
 164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 165             chroma_weight_l0_flag[i] = 0;
 166     }
 167     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 168         if (luma_weight_l0_flag[i]) {
 169             int delta_luma_weight_l0 = get_se_golomb(gb);
 170             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 171             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 172         }
 173         if (chroma_weight_l0_flag[i]) {
 174             for (j = 0; j < 2; j++) {
 175                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 176                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 177                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 178                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 179                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 180             }
 181         } else {
 182             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 183             s->sh.chroma_offset_l0[i][0] = 0;
 184             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 185             s->sh.chroma_offset_l0[i][1] = 0;
 186         }
 187     }
 188     if (s->sh.slice_type == B_SLICE) {
 189         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 190             luma_weight_l1_flag[i] = get_bits1(gb);
 191             if (!luma_weight_l1_flag[i]) {
 192                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 193                 s->sh.luma_offset_l1[i] = 0;
 194             }
 195         }
 196         if (s->sps->chroma_format_idc != 0) {
 197             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 198                 chroma_weight_l1_flag[i] = get_bits1(gb);
 199         } else {
 200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 201                 chroma_weight_l1_flag[i] = 0;
 202         }
 203         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 204             if (luma_weight_l1_flag[i]) {
 205                 int delta_luma_weight_l1 = get_se_golomb(gb);
 206                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 207                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 208             }
 209             if (chroma_weight_l1_flag[i]) {
 210                 for (j = 0; j < 2; j++) {
 211                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 212                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 213                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 214                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 215                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 216                 }
 217             } else {
 218                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 219                 s->sh.chroma_offset_l1[i][0] = 0;
 220                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 221                 s->sh.chroma_offset_l1[i][1] = 0;
 222             }
 223         }
 224     }
 225 }
 226
 227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 228 {
 229     const HEVCSPS *sps = s->sps;
 230     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 231     int prev_delta_msb = 0;
 232     unsigned int nb_sps = 0, nb_sh;
 233     int i;
 234
 235     rps->nb_refs = 0;
 236     if (!sps->long_term_ref_pics_present_flag)
 237         return 0;
 238
 239     if (sps->num_long_term_ref_pics_sps > 0)
 240         nb_sps = get_ue_golomb_long(gb);
 241     nb_sh = get_ue_golomb_long(gb);
 242
 243     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 244         return AVERROR_INVALIDDATA;
 245
 246     rps->nb_refs = nb_sh + nb_sps;
 247
 248     for (i = 0; i < rps->nb_refs; i++) {
 249         uint8_t delta_poc_msb_present;
 250
 251         if (i < nb_sps) {
 252             uint8_t lt_idx_sps = 0;
 253
 254             if (sps->num_long_term_ref_pics_sps > 1)
 255                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 256
 257             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 258             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 259         } else {
 260             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 261             rps->used[i] = get_bits1(gb);
 262         }
 263
 264         delta_poc_msb_present = get_bits1(gb);
 265         if (delta_poc_msb_present) {
 266             int delta = get_ue_golomb_long(gb);
 267
 268             if (i && i != nb_sps)
 269                 delta += prev_delta_msb;
 270
 271             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 272             prev_delta_msb = delta;
 273         }
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int get_buffer_sao(HEVCContext *s, AVFrame *frame, HEVCSPS *sps)
 280 {
 281     int ret, i;
 282
 283     frame->width  = s->avctx->width  + 2;
 284     frame->height = s->avctx->height + 2;
 285     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 286         return ret;
 287     for (i = 0; frame->data[i]; i++) {
 288         int offset = frame->linesize[i] + (1 << sps->pixel_shift);
 289         frame->data[i] += offset;
 290     }
 291     frame->width  = s->avctx->width;
 292     frame->height = s->avctx->height;
 293
 294     return 0;
 295 }
 296
 297 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 298 {
 299     int ret;
 300     unsigned int num = 0, den = 0;
 301
 302     pic_arrays_free(s);
 303     ret = pic_arrays_init(s, sps);
 304     if (ret < 0)
 305         goto fail;
 306
 307     s->avctx->coded_width         = sps->width;
 308     s->avctx->coded_height        = sps->height;
 309     s->avctx->width               = sps->output_width;
 310     s->avctx->height              = sps->output_height;
 311     s->avctx->pix_fmt             = sps->pix_fmt;
 312     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 313
 314     ff_set_sar(s->avctx, sps->vui.sar);
 315
 316     if (sps->vui.video_signal_type_present_flag)
 317         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 318                                                                : AVCOL_RANGE_MPEG;
 319     else
 320         s->avctx->color_range = AVCOL_RANGE_MPEG;
 321
 322     if (sps->vui.colour_description_present_flag) {
 323         s->avctx->color_primaries = sps->vui.colour_primaries;
 324         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 325         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 326     } else {
 327         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 328         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 329         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 330     }
 331
 332     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 333     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 334     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 335
 336     if (sps->sao_enabled) {
 337         av_frame_unref(s->tmp_frame);
 338         ret = get_buffer_sao(s, s->tmp_frame, sps);
 339         s->sao_frame = s->tmp_frame;
 340     }
 341
 342     s->sps = sps;
 343     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 344
 345     if (s->vps->vps_timing_info_present_flag) {
 346         num = s->vps->vps_num_units_in_tick;
 347         den = s->vps->vps_time_scale;
 348     } else if (sps->vui.vui_timing_info_present_flag) {
 349         num = sps->vui.vui_num_units_in_tick;
 350         den = sps->vui.vui_time_scale;
 351     }
 352
 353     if (num != 0 && den != 0)
 354         av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
 355                   num, den, 1 << 30);
 356
 357     return 0;
 358
 359 fail:
 360     pic_arrays_free(s);
 361     s->sps = NULL;
 362     return ret;
 363 }
 364
 365 static int is_sps_exist(HEVCContext *s, const HEVCSPS* last_sps)
 366 {
 367     int i;
 368
 369     for( i = 0; i < MAX_SPS_COUNT; i++)
 370         if(s->sps_list[i])
 371             if (last_sps == (HEVCSPS*)s->sps_list[i]->data)
 372                 return 1;
 373     return 0;
 374 }
 375
 376 static int hls_slice_header(HEVCContext *s)
 377 {
 378     GetBitContext *gb = &s->HEVClc->gb;
 379     SliceHeader *sh   = &s->sh;
 380     int i, j, ret;
 381
 382     // Coded parameters
 383     sh->first_slice_in_pic_flag = get_bits1(gb);
 384     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 385         s->seq_decode = (s->seq_decode + 1) & 0xff;
 386         s->max_ra     = INT_MAX;
 387         if (IS_IDR(s))
 388             ff_hevc_clear_refs(s);
 389     }
 390     sh->no_output_of_prior_pics_flag = 0;
 391     if (IS_IRAP(s))
 392         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 393     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 394         sh->no_output_of_prior_pics_flag = 1;
 395
 396     sh->pps_id = get_ue_golomb_long(gb);
 397     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 398         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 399         return AVERROR_INVALIDDATA;
 400     }
 401     if (!sh->first_slice_in_pic_flag &&
 402         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 403         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 404         return AVERROR_INVALIDDATA;
 405     }
 406     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 407
 408     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 409         const HEVCSPS* last_sps = s->sps;
 410         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 411         if (last_sps) {
 412             if (is_sps_exist(s, last_sps)) {
 413                 if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 414                         s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering != last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 415                     sh->no_output_of_prior_pics_flag = 0;
 416             } else
 417                 sh->no_output_of_prior_pics_flag = 0;
 418         }
 419         ff_hevc_clear_refs(s);
 420         ret = set_sps(s, s->sps);
 421         if (ret < 0)
 422             return ret;
 423
 424         s->seq_decode = (s->seq_decode + 1) & 0xff;
 425         s->max_ra     = INT_MAX;
 426     }
 427
 428     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 429     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 430
 431     sh->dependent_slice_segment_flag = 0;
 432     if (!sh->first_slice_in_pic_flag) {
 433         int slice_address_length;
 434
 435         if (s->pps->dependent_slice_segments_enabled_flag)
 436             sh->dependent_slice_segment_flag = get_bits1(gb);
 437
 438         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 439                                             s->sps->ctb_height);
 440         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 441         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 442             av_log(s->avctx, AV_LOG_ERROR,
 443                    "Invalid slice segment address: %u.\n",
 444                    sh->slice_segment_addr);
 445             return AVERROR_INVALIDDATA;
 446         }
 447
 448         if (!sh->dependent_slice_segment_flag) {
 449             sh->slice_addr = sh->slice_segment_addr;
 450             s->slice_idx++;
 451         }
 452     } else {
 453         sh->slice_segment_addr = sh->slice_addr = 0;
 454         s->slice_idx           = 0;
 455         s->slice_initialized   = 0;
 456     }
 457
 458     if (!sh->dependent_slice_segment_flag) {
 459         s->slice_initialized = 0;
 460
 461         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 462             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 463
 464         sh->slice_type = get_ue_golomb_long(gb);
 465         if (!(sh->slice_type == I_SLICE ||
 466               sh->slice_type == P_SLICE ||
 467               sh->slice_type == B_SLICE)) {
 468             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 469                    sh->slice_type);
 470             return AVERROR_INVALIDDATA;
 471         }
 472         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 473             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 474             return AVERROR_INVALIDDATA;
 475         }
 476
 477         // when flag is not present, picture is inferred to be output
 478         sh->pic_output_flag = 1;
 479         if (s->pps->output_flag_present_flag)
 480             sh->pic_output_flag = get_bits1(gb);
 481
 482         if (s->sps->separate_colour_plane_flag)
 483             sh->colour_plane_id = get_bits(gb, 2);
 484
 485         if (!IS_IDR(s)) {
 486             int short_term_ref_pic_set_sps_flag, poc;
 487
 488             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 489             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 490             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 491                 av_log(s->avctx, AV_LOG_WARNING,
 492                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 493                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 494                     return AVERROR_INVALIDDATA;
 495                 poc = s->poc;
 496             }
 497             s->poc = poc;
 498
 499             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 500             if (!short_term_ref_pic_set_sps_flag) {
 501                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 502                 if (ret < 0)
 503                     return ret;
 504
 505                 sh->short_term_rps = &sh->slice_rps;
 506             } else {
 507                 int numbits, rps_idx;
 508
 509                 if (!s->sps->nb_st_rps) {
 510                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 511                     return AVERROR_INVALIDDATA;
 512                 }
 513
 514                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 515                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 516                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 517             }
 518
 519             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 520             if (ret < 0) {
 521                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 522                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 523                     return AVERROR_INVALIDDATA;
 524             }
 525
 526             if (s->sps->sps_temporal_mvp_enabled_flag)
 527                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 528             else
 529                 sh->slice_temporal_mvp_enabled_flag = 0;
 530         } else {
 531             s->sh.short_term_rps = NULL;
 532             s->poc               = 0;
 533         }
 534
 535         /* 8.3.1 */
 536         if (s->temporal_id == 0 &&
 537             s->nal_unit_type != NAL_TRAIL_N &&
 538             s->nal_unit_type != NAL_TSA_N   &&
 539             s->nal_unit_type != NAL_STSA_N  &&
 540             s->nal_unit_type != NAL_RADL_N  &&
 541             s->nal_unit_type != NAL_RADL_R  &&
 542             s->nal_unit_type != NAL_RASL_N  &&
 543             s->nal_unit_type != NAL_RASL_R)
 544             s->pocTid0 = s->poc;
 545
 546         if (s->sps->sao_enabled) {
 547             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 548             sh->slice_sample_adaptive_offset_flag[1] =
 549             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 550         } else {
 551             sh->slice_sample_adaptive_offset_flag[0] = 0;
 552             sh->slice_sample_adaptive_offset_flag[1] = 0;
 553             sh->slice_sample_adaptive_offset_flag[2] = 0;
 554         }
 555
 556         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 557         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 558             int nb_refs;
 559
 560             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 561             if (sh->slice_type == B_SLICE)
 562                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 563
 564             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 565                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 566                 if (sh->slice_type == B_SLICE)
 567                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 568             }
 569             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 570                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 571                        sh->nb_refs[L0], sh->nb_refs[L1]);
 572                 return AVERROR_INVALIDDATA;
 573             }
 574
 575             sh->rpl_modification_flag[0] = 0;
 576             sh->rpl_modification_flag[1] = 0;
 577             nb_refs = ff_hevc_frame_nb_refs(s);
 578             if (!nb_refs) {
 579                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 580                 return AVERROR_INVALIDDATA;
 581             }
 582
 583             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 584                 sh->rpl_modification_flag[0] = get_bits1(gb);
 585                 if (sh->rpl_modification_flag[0]) {
 586                     for (i = 0; i < sh->nb_refs[L0]; i++)
 587                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 588                 }
 589
 590                 if (sh->slice_type == B_SLICE) {
 591                     sh->rpl_modification_flag[1] = get_bits1(gb);
 592                     if (sh->rpl_modification_flag[1] == 1)
 593                         for (i = 0; i < sh->nb_refs[L1]; i++)
 594                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 595                 }
 596             }
 597
 598             if (sh->slice_type == B_SLICE)
 599                 sh->mvd_l1_zero_flag = get_bits1(gb);
 600
 601             if (s->pps->cabac_init_present_flag)
 602                 sh->cabac_init_flag = get_bits1(gb);
 603             else
 604                 sh->cabac_init_flag = 0;
 605
 606             sh->collocated_ref_idx = 0;
 607             if (sh->slice_temporal_mvp_enabled_flag) {
 608                 sh->collocated_list = L0;
 609                 if (sh->slice_type == B_SLICE)
 610                     sh->collocated_list = !get_bits1(gb);
 611
 612                 if (sh->nb_refs[sh->collocated_list] > 1) {
 613                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 614                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 615                         av_log(s->avctx, AV_LOG_ERROR,
 616                                "Invalid collocated_ref_idx: %d.\n",
 617                                sh->collocated_ref_idx);
 618                         return AVERROR_INVALIDDATA;
 619                     }
 620                 }
 621             }
 622
 623             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 624                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 625                 pred_weight_table(s, gb);
 626             }
 627
 628             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 629             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 630                 av_log(s->avctx, AV_LOG_ERROR,
 631                        "Invalid number of merging MVP candidates: %d.\n",
 632                        sh->max_num_merge_cand);
 633                 return AVERROR_INVALIDDATA;
 634             }
 635         }
 636
 637         sh->slice_qp_delta = get_se_golomb(gb);
 638
 639         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 640             sh->slice_cb_qp_offset = get_se_golomb(gb);
 641             sh->slice_cr_qp_offset = get_se_golomb(gb);
 642         } else {
 643             sh->slice_cb_qp_offset = 0;
 644             sh->slice_cr_qp_offset = 0;
 645         }
 646
 647         if (s->pps->chroma_qp_offset_list_enabled_flag)
 648             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 649         else
 650             sh->cu_chroma_qp_offset_enabled_flag = 0;
 651
 652         if (s->pps->deblocking_filter_control_present_flag) {
 653             int deblocking_filter_override_flag = 0;
 654
 655             if (s->pps->deblocking_filter_override_enabled_flag)
 656                 deblocking_filter_override_flag = get_bits1(gb);
 657
 658             if (deblocking_filter_override_flag) {
 659                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 660                 if (!sh->disable_deblocking_filter_flag) {
 661                     sh->beta_offset = get_se_golomb(gb) * 2;
 662                     sh->tc_offset   = get_se_golomb(gb) * 2;
 663                 }
 664             } else {
 665                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 666                 sh->beta_offset                    = s->pps->beta_offset;
 667                 sh->tc_offset                      = s->pps->tc_offset;
 668             }
 669         } else {
 670             sh->disable_deblocking_filter_flag = 0;
 671             sh->beta_offset                    = 0;
 672             sh->tc_offset                      = 0;
 673         }
 674
 675         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 676             (sh->slice_sample_adaptive_offset_flag[0] ||
 677              sh->slice_sample_adaptive_offset_flag[1] ||
 678              !sh->disable_deblocking_filter_flag)) {
 679             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 680         } else {
 681             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 682         }
 683     } else if (!s->slice_initialized) {
 684         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 685         return AVERROR_INVALIDDATA;
 686     }
 687
 688     sh->num_entry_point_offsets = 0;
 689     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 690         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 691         if (sh->num_entry_point_offsets > 0) {
 692             int offset_len = get_ue_golomb_long(gb) + 1;
 693             int segments = offset_len >> 4;
 694             int rest = (offset_len & 15);
 695             av_freep(&sh->entry_point_offset);
 696             av_freep(&sh->offset);
 697             av_freep(&sh->size);
 698             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 699             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 700             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 701             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 702                 sh->num_entry_point_offsets = 0;
 703                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 704                 return AVERROR(ENOMEM);
 705             }
 706             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 707                 int val = 0;
 708                 for (j = 0; j < segments; j++) {
 709                     val <<= 16;
 710                     val += get_bits(gb, 16);
 711                 }
 712                 if (rest) {
 713                     val <<= rest;
 714                     val += get_bits(gb, rest);
 715                 }
 716                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 717             }
 718             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 719                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 720                 s->threads_number = 1;
 721             } else
 722                 s->enable_parallel_tiles = 0;
 723         } else
 724             s->enable_parallel_tiles = 0;
 725     }
 726
 727     if (s->pps->slice_header_extension_present_flag) {
 728         unsigned int length = get_ue_golomb_long(gb);
 729         if (length*8LL > get_bits_left(gb)) {
 730             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 731             return AVERROR_INVALIDDATA;
 732         }
 733         for (i = 0; i < length; i++)
 734             skip_bits(gb, 8);  // slice_header_extension_data_byte
 735     }
 736
 737     // Inferred parameters
 738     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 739     if (sh->slice_qp > 51 ||
 740         sh->slice_qp < -s->sps->qp_bd_offset) {
 741         av_log(s->avctx, AV_LOG_ERROR,
 742                "The slice_qp %d is outside the valid range "
 743                "[%d, 51].\n",
 744                sh->slice_qp,
 745                -s->sps->qp_bd_offset);
 746         return AVERROR_INVALIDDATA;
 747     }
 748
 749     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 750
 751     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 752         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 753         return AVERROR_INVALIDDATA;
 754     }
 755
 756     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 757
 758     if (!s->pps->cu_qp_delta_enabled_flag)
 759         s->HEVClc->qp_y = s->sh.slice_qp;
 760
 761     s->slice_initialized = 1;
 762     s->HEVClc->tu.cu_qp_offset_cb = 0;
 763     s->HEVClc->tu.cu_qp_offset_cr = 0;
 764
 765     return 0;
 766 }
 767
 768 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 769
 770 #define SET_SAO(elem, value)                            \
 771 do {                                                    \
 772     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 773         sao->elem = value;                              \
 774     else if (sao_merge_left_flag)                       \
 775         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 776     else if (sao_merge_up_flag)                         \
 777         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 778     else                                                \
 779         sao->elem = 0;                                  \
 780 } while (0)
 781
 782 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 783 {
 784     HEVCLocalContext *lc    = s->HEVClc;
 785     int sao_merge_left_flag = 0;
 786     int sao_merge_up_flag   = 0;
 787     SAOParams *sao          = &CTB(s->sao, rx, ry);
 788     int c_idx, i;
 789
 790     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 791         s->sh.slice_sample_adaptive_offset_flag[1]) {
 792         if (rx > 0) {
 793             if (lc->ctb_left_flag)
 794                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 795         }
 796         if (ry > 0 && !sao_merge_left_flag) {
 797             if (lc->ctb_up_flag)
 798                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 799         }
 800     }
 801
 802     for (c_idx = 0; c_idx < 3; c_idx++) {
 803         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 804                                                  s->pps->log2_sao_offset_scale_chroma;
 805
 806         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 807             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 808             continue;
 809         }
 810
 811         if (c_idx == 2) {
 812             sao->type_idx[2] = sao->type_idx[1];
 813             sao->eo_class[2] = sao->eo_class[1];
 814         } else {
 815             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 816         }
 817
 818         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 819             continue;
 820
 821         for (i = 0; i < 4; i++)
 822             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 823
 824         if (sao->type_idx[c_idx] == SAO_BAND) {
 825             for (i = 0; i < 4; i++) {
 826                 if (sao->offset_abs[c_idx][i]) {
 827                     SET_SAO(offset_sign[c_idx][i],
 828                             ff_hevc_sao_offset_sign_decode(s));
 829                 } else {
 830                     sao->offset_sign[c_idx][i] = 0;
 831                 }
 832             }
 833             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 834         } else if (c_idx != 2) {
 835             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 836         }
 837
 838         // Inferred parameters
 839         sao->offset_val[c_idx][0] = 0;
 840         for (i = 0; i < 4; i++) {
 841             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 842             if (sao->type_idx[c_idx] == SAO_EDGE) {
 843                 if (i > 1)
 844                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 845             } else if (sao->offset_sign[c_idx][i]) {
 846                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 847             }
 848             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 849         }
 850     }
 851 }
 852
 853 #undef SET_SAO
 854 #undef CTB
 855
 856 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 857     HEVCLocalContext *lc    = s->HEVClc;
 858     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 859
 860     if (log2_res_scale_abs_plus1 !=  0) {
 861         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 862         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 863                                (1 - 2 * res_scale_sign_flag);
 864     } else {
 865         lc->tu.res_scale_val = 0;
 866     }
 867
 868
 869     return 0;
 870 }
 871
 872 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 873                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 874                               int log2_cb_size, int log2_trafo_size,
 875                               int trafo_depth, int blk_idx)
 876 {
 877     HEVCLocalContext *lc = s->HEVClc;
 878     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 879     int i;
 880
 881     if (lc->cu.pred_mode == MODE_INTRA) {
 882         int trafo_size = 1 << log2_trafo_size;
 883         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 884
 885         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 886     }
 887
 888     if (lc->tt.cbf_luma ||
 889         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 890         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 891         (s->sps->chroma_format_idc == 2 &&
 892          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 893          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))))) {
 894         int scan_idx   = SCAN_DIAG;
 895         int scan_idx_c = SCAN_DIAG;
 896         int cbf_luma = lc->tt.cbf_luma;
 897         int cbf_chroma = SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
 898                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
 899                          (s->sps->chroma_format_idc == 2 &&
 900                          (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << log2_trafo_size_c)) ||
 901                          SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << log2_trafo_size_c))));
 902
 903         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 904             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 905             if (lc->tu.cu_qp_delta != 0)
 906                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 907                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 908             lc->tu.is_cu_qp_delta_coded = 1;
 909
 910             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 911                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 912                 av_log(s->avctx, AV_LOG_ERROR,
 913                        "The cu_qp_delta %d is outside the valid range "
 914                        "[%d, %d].\n",
 915                        lc->tu.cu_qp_delta,
 916                        -(26 + s->sps->qp_bd_offset / 2),
 917                         (25 + s->sps->qp_bd_offset / 2));
 918                 return AVERROR_INVALIDDATA;
 919             }
 920
 921             ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
 922         }
 923
 924         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 925             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 926             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 927             if (cu_chroma_qp_offset_flag) {
 928                 int cu_chroma_qp_offset_idx  = 0;
 929                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 930                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 931                     av_log(s->avctx, AV_LOG_ERROR,
 932                         "cu_chroma_qp_offset_idx not yet tested.\n");
 933                 }
 934                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 935                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 936             } else {
 937                 lc->tu.cu_qp_offset_cb = 0;
 938                 lc->tu.cu_qp_offset_cr = 0;
 939             }
 940             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 941         }
 942
 943         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 944             if (lc->tu.intra_pred_mode >= 6 &&
 945                 lc->tu.intra_pred_mode <= 14) {
 946                 scan_idx = SCAN_VERT;
 947             } else if (lc->tu.intra_pred_mode >= 22 &&
 948                        lc->tu.intra_pred_mode <= 30) {
 949                 scan_idx = SCAN_HORIZ;
 950             }
 951
 952             if (lc->tu.intra_pred_mode_c >=  6 &&
 953                 lc->tu.intra_pred_mode_c <= 14) {
 954                 scan_idx_c = SCAN_VERT;
 955             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 956                        lc->tu.intra_pred_mode_c <= 30) {
 957                 scan_idx_c = SCAN_HORIZ;
 958             }
 959         }
 960
 961         lc->tu.cross_pf = 0;
 962
 963         if (cbf_luma)
 964             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 965         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 966             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 967             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 968             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 969                                 (lc->cu.pred_mode == MODE_INTER ||
 970                                  (lc->tu.chroma_mode_c ==  4)));
 971
 972             if (lc->tu.cross_pf) {
 973                 hls_cross_component_pred(s, 0);
 974             }
 975             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 976                 if (lc->cu.pred_mode == MODE_INTRA) {
 977                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 978                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 979                 }
 980                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
 981                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 982                                                 log2_trafo_size_c, scan_idx_c, 1);
 983                 else
 984                     if (lc->tu.cross_pf) {
 985                         ptrdiff_t stride = s->frame->linesize[1];
 986                         int hshift = s->sps->hshift[1];
 987                         int vshift = s->sps->vshift[1];
 988                         int16_t *coeffs_y = lc->tu.coeffs[0];
 989                         int16_t *coeffs =   lc->tu.coeffs[1];
 990                         int size = 1 << log2_trafo_size_c;
 991
 992                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
 993                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 994                         for (i = 0; i < (size * size); i++) {
 995                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 996                         }
 997                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
 998                     }
 999             }
1000
1001             if (lc->tu.cross_pf) {
1002                 hls_cross_component_pred(s, 1);
1003             }
1004             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1005                 if (lc->cu.pred_mode == MODE_INTRA) {
1006                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1007                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1008                 }
1009                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (i << log2_trafo_size_c)))
1010                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1011                                                 log2_trafo_size_c, scan_idx_c, 2);
1012                 else
1013                     if (lc->tu.cross_pf) {
1014                         ptrdiff_t stride = s->frame->linesize[2];
1015                         int hshift = s->sps->hshift[2];
1016                         int vshift = s->sps->vshift[2];
1017                         int16_t *coeffs_y = lc->tu.coeffs[0];
1018                         int16_t *coeffs =   lc->tu.coeffs[1];
1019                         int size = 1 << log2_trafo_size_c;
1020
1021                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1022                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1023                         for (i = 0; i < (size * size); i++) {
1024                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1025                         }
1026                         s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
1027                     }
1028             }
1029         } else if (blk_idx == 3) {
1030             int trafo_size_h = 1 << (log2_trafo_size + 1);
1031             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1032             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1033                 if (lc->cu.pred_mode == MODE_INTRA) {
1034                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1035                                                     trafo_size_h, trafo_size_v);
1036                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1037                 }
1038                 if (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1039                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1040                                                 log2_trafo_size, scan_idx_c, 1);
1041             }
1042             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1043                 if (lc->cu.pred_mode == MODE_INTRA) {
1044                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1045                                                 trafo_size_h, trafo_size_v);
1046                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1047                 }
1048                 if (SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], xBase, yBase + (i << log2_trafo_size_c)))
1049                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1050                                                 log2_trafo_size, scan_idx_c, 2);
1051             }
1052         }
1053     } else if (lc->cu.pred_mode == MODE_INTRA) {
1054         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1055             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1056             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1057             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1058             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1059             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1060             if (s->sps->chroma_format_idc == 2) {
1061                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1062                                                 trafo_size_h, trafo_size_v);
1063                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1064                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1065             }
1066         } else if (blk_idx == 3) {
1067             int trafo_size_h = 1 << (log2_trafo_size + 1);
1068             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1069             ff_hevc_set_neighbour_available(s, xBase, yBase,
1070                                             trafo_size_h, trafo_size_v);
1071             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1072             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1073             if (s->sps->chroma_format_idc == 2) {
1074                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1075                                                 trafo_size_h, trafo_size_v);
1076                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1077                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1078             }
1079         }
1080     }
1081
1082     return 0;
1083 }
1084
1085 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1086 {
1087     int cb_size          = 1 << log2_cb_size;
1088     int log2_min_pu_size = s->sps->log2_min_pu_size;
1089
1090     int min_pu_width     = s->sps->min_pu_width;
1091     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1092     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1093     int i, j;
1094
1095     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1096         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1097             s->is_pcm[i + j * min_pu_width] = 2;
1098 }
1099
1100 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1101                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1102                               int log2_cb_size, int log2_trafo_size,
1103                               int trafo_depth, int blk_idx)
1104 {
1105     HEVCLocalContext *lc = s->HEVClc;
1106     uint8_t split_transform_flag;
1107     int ret;
1108
1109     if (trafo_depth > 0 && log2_trafo_size == 2) {
1110         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1111             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase);
1112         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1113             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase);
1114         if (s->sps->chroma_format_idc == 2) {
1115             int xBase_cb = xBase & ((1 << log2_trafo_size) - 1);
1116             int yBase_cb = yBase & ((1 << log2_trafo_size) - 1);
1117             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1118                 SAMPLE_CBF2(lc->tt.cbf_cb[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1119             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1120                 SAMPLE_CBF2(lc->tt.cbf_cr[trafo_depth - 1], xBase_cb, yBase_cb + (1 << (log2_trafo_size)));
1121         }
1122     } else {
1123         SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1124         SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) = 0;
1125         if (s->sps->chroma_format_idc == 2) {
1126             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) =
1127             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 + (1 << (log2_trafo_size - 1))) = 0;
1128         }
1129     }
1130
1131     if (lc->cu.intra_split_flag) {
1132         if (trafo_depth == 1) {
1133             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1134             if (s->sps->chroma_format_idc == 3) {
1135                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1136                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1137             } else {
1138                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1139                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1140             }
1141         }
1142     } else {
1143         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1144         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1145         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1146     }
1147
1148     lc->tt.cbf_luma = 1;
1149
1150     lc->tt.inter_split_flag = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1151                               lc->cu.pred_mode == MODE_INTER &&
1152                               lc->cu.part_mode != PART_2Nx2N &&
1153                               trafo_depth == 0;
1154
1155     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1156         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1157         trafo_depth     < lc->cu.max_trafo_depth       &&
1158         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1159         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1160     } else {
1161         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1162                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1163                                lc->tt.inter_split_flag;
1164     }
1165
1166     if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1167         if (trafo_depth == 0 ||
1168             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth - 1], xBase, yBase)) {
1169             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) =
1170                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1171             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1172                 SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1173                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1174             }
1175         }
1176
1177         if (trafo_depth == 0 ||
1178             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth - 1], xBase, yBase)) {
1179             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) =
1180                 ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1181             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1182                 SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) =
1183                     ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1184             }
1185         }
1186     }
1187
1188     if (split_transform_flag) {
1189         int x1 = x0 + ((1 << log2_trafo_size) >> 1);
1190         int y1 = y0 + ((1 << log2_trafo_size) >> 1);
1191
1192         ret = hls_transform_tree(s, x0, y0, x0, y0, cb_xBase, cb_yBase,
1193                                  log2_cb_size, log2_trafo_size - 1,
1194                                  trafo_depth + 1, 0);
1195         if (ret < 0)
1196             return ret;
1197         ret = hls_transform_tree(s, x1, y0, x0, y0, cb_xBase, cb_yBase,
1198                                  log2_cb_size, log2_trafo_size - 1,
1199                                  trafo_depth + 1, 1);
1200         if (ret < 0)
1201             return ret;
1202         ret = hls_transform_tree(s, x0, y1, x0, y0, cb_xBase, cb_yBase,
1203                                  log2_cb_size, log2_trafo_size - 1,
1204                                  trafo_depth + 1, 2);
1205         if (ret < 0)
1206             return ret;
1207         ret = hls_transform_tree(s, x1, y1, x0, y0, cb_xBase, cb_yBase,
1208                                  log2_cb_size, log2_trafo_size - 1,
1209                                  trafo_depth + 1, 3);
1210         if (ret < 0)
1211             return ret;
1212     } else {
1213         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1214         int log2_min_tu_size = s->sps->log2_min_tb_size;
1215         int min_tu_width     = s->sps->min_tb_width;
1216
1217         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1218             SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0) ||
1219             SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0) ||
1220             (s->sps->chroma_format_idc == 2 &&
1221              (SAMPLE_CBF(lc->tt.cbf_cb[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1))) ||
1222               SAMPLE_CBF(lc->tt.cbf_cr[trafo_depth], x0, y0 +  (1  <<  (log2_trafo_size - 1)))))) {
1223             lc->tt.cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1224         }
1225
1226         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1227                                  log2_cb_size, log2_trafo_size, trafo_depth,
1228                                  blk_idx);
1229         if (ret < 0)
1230             return ret;
1231         // TODO: store cbf_luma somewhere else
1232         if (lc->tt.cbf_luma) {
1233             int i, j;
1234             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1235                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1236                     int x_tu = (x0 + j) >> log2_min_tu_size;
1237                     int y_tu = (y0 + i) >> log2_min_tu_size;
1238                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1239                 }
1240         }
1241         if (!s->sh.disable_deblocking_filter_flag) {
1242             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1243             if (s->pps->transquant_bypass_enable_flag &&
1244                 lc->cu.cu_transquant_bypass_flag)
1245                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1246         }
1247     }
1248     return 0;
1249 }
1250
1251 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1252 {
1253     //TODO: non-4:2:0 support
1254     HEVCLocalContext *lc = s->HEVClc;
1255     GetBitContext gb;
1256     int cb_size   = 1 << log2_cb_size;
1257     int stride0   = s->frame->linesize[0];
1258     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1259     int   stride1 = s->frame->linesize[1];
1260     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1261     int   stride2 = s->frame->linesize[2];
1262     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1263
1264     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1265                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1266                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1267                           s->sps->pcm.bit_depth_chroma;
1268     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1269     int ret;
1270
1271     if (!s->sh.disable_deblocking_filter_flag)
1272         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1273
1274     ret = init_get_bits(&gb, pcm, length);
1275     if (ret < 0)
1276         return ret;
1277
1278     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1279     s->hevcdsp.put_pcm(dst1, stride1,
1280                        cb_size >> s->sps->hshift[1],
1281                        cb_size >> s->sps->vshift[1],
1282                        &gb, s->sps->pcm.bit_depth_chroma);
1283     s->hevcdsp.put_pcm(dst2, stride2,
1284                        cb_size >> s->sps->hshift[2],
1285                        cb_size >> s->sps->vshift[2],
1286                        &gb, s->sps->pcm.bit_depth_chroma);
1287     return 0;
1288 }
1289
1290 /**
1291  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1292  *
1293  * @param s HEVC decoding context
1294  * @param dst target buffer for block data at block position
1295  * @param dststride stride of the dst buffer
1296  * @param ref reference picture buffer at origin (0, 0)
1297  * @param mv motion vector (relative to block position) to get pixel data from
1298  * @param x_off horizontal position of block from origin (0, 0)
1299  * @param y_off vertical position of block from origin (0, 0)
1300  * @param block_w width of block
1301  * @param block_h height of block
1302  * @param luma_weight weighting factor applied to the luma prediction
1303  * @param luma_offset additive offset applied to the luma prediction value
1304  */
1305
1306 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1307                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1308                         int block_w, int block_h, int luma_weight, int luma_offset)
1309 {
1310     HEVCLocalContext *lc = s->HEVClc;
1311     uint8_t *src         = ref->data[0];
1312     ptrdiff_t srcstride  = ref->linesize[0];
1313     int pic_width        = s->sps->width;
1314     int pic_height       = s->sps->height;
1315     int mx               = mv->x & 3;
1316     int my               = mv->y & 3;
1317     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1318                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1319     int idx              = ff_hevc_pel_weight[block_w];
1320
1321     x_off += mv->x >> 2;
1322     y_off += mv->y >> 2;
1323     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1324
1325     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1326         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1327         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1328         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1329         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1330         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1331
1332         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1333                                  edge_emu_stride, srcstride,
1334                                  block_w + QPEL_EXTRA,
1335                                  block_h + QPEL_EXTRA,
1336                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1337                                  pic_width, pic_height);
1338         src = lc->edge_emu_buffer + buf_offset;
1339         srcstride = edge_emu_stride;
1340     }
1341
1342     if (!weight_flag)
1343         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1344                                                       block_h, mx, my, block_w);
1345     else
1346         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1347                                                         block_h, s->sh.luma_log2_weight_denom,
1348                                                         luma_weight, luma_offset, mx, my, block_w);
1349 }
1350
1351 /**
1352  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1353  *
1354  * @param s HEVC decoding context
1355  * @param dst target buffer for block data at block position
1356  * @param dststride stride of the dst buffer
1357  * @param ref0 reference picture0 buffer at origin (0, 0)
1358  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1359  * @param x_off horizontal position of block from origin (0, 0)
1360  * @param y_off vertical position of block from origin (0, 0)
1361  * @param block_w width of block
1362  * @param block_h height of block
1363  * @param ref1 reference picture1 buffer at origin (0, 0)
1364  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1365  * @param current_mv current motion vector structure
1366  */
1367  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1368                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1369                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1370 {
1371     HEVCLocalContext *lc = s->HEVClc;
1372     DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
1373     ptrdiff_t src0stride  = ref0->linesize[0];
1374     ptrdiff_t src1stride  = ref1->linesize[0];
1375     int pic_width        = s->sps->width;
1376     int pic_height       = s->sps->height;
1377     int mx0              = mv0->x & 3;
1378     int my0              = mv0->y & 3;
1379     int mx1              = mv1->x & 3;
1380     int my1              = mv1->y & 3;
1381     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1382                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1383     int x_off0           = x_off + (mv0->x >> 2);
1384     int y_off0           = y_off + (mv0->y >> 2);
1385     int x_off1           = x_off + (mv1->x >> 2);
1386     int y_off1           = y_off + (mv1->y >> 2);
1387     int idx              = ff_hevc_pel_weight[block_w];
1388
1389     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1390     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1391
1392     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1393         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1394         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1395         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1396         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1397         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1398
1399         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1400                                  edge_emu_stride, src0stride,
1401                                  block_w + QPEL_EXTRA,
1402                                  block_h + QPEL_EXTRA,
1403                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1404                                  pic_width, pic_height);
1405         src0 = lc->edge_emu_buffer + buf_offset;
1406         src0stride = edge_emu_stride;
1407     }
1408
1409     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1410         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1411         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1412         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1413         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1414         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1415
1416         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1417                                  edge_emu_stride, src1stride,
1418                                  block_w + QPEL_EXTRA,
1419                                  block_h + QPEL_EXTRA,
1420                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1421                                  pic_width, pic_height);
1422         src1 = lc->edge_emu_buffer2 + buf_offset;
1423         src1stride = edge_emu_stride;
1424     }
1425
1426     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
1427                                                 block_h, mx0, my0, block_w);
1428     if (!weight_flag)
1429         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1430                                                        block_h, mx1, my1, block_w);
1431     else
1432         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
1433                                                          block_h, s->sh.luma_log2_weight_denom,
1434                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1435                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1436                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1437                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1438                                                          mx1, my1, block_w);
1439
1440 }
1441
1442 /**
1443  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1444  *
1445  * @param s HEVC decoding context
1446  * @param dst1 target buffer for block data at block position (U plane)
1447  * @param dst2 target buffer for block data at block position (V plane)
1448  * @param dststride stride of the dst1 and dst2 buffers
1449  * @param ref reference picture buffer at origin (0, 0)
1450  * @param mv motion vector (relative to block position) to get pixel data from
1451  * @param x_off horizontal position of block from origin (0, 0)
1452  * @param y_off vertical position of block from origin (0, 0)
1453  * @param block_w width of block
1454  * @param block_h height of block
1455  * @param chroma_weight weighting factor applied to the chroma prediction
1456  * @param chroma_offset additive offset applied to the chroma prediction value
1457  */
1458
1459 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1460                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1461                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1462 {
1463     HEVCLocalContext *lc = s->HEVClc;
1464     int pic_width        = s->sps->width >> s->sps->hshift[1];
1465     int pic_height       = s->sps->height >> s->sps->vshift[1];
1466     const Mv *mv         = &current_mv->mv[reflist];
1467     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1468                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1469     int idx              = ff_hevc_pel_weight[block_w];
1470     int hshift           = s->sps->hshift[1];
1471     int vshift           = s->sps->vshift[1];
1472     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1473     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1474     intptr_t _mx         = mx << (1 - hshift);
1475     intptr_t _my         = my << (1 - vshift);
1476
1477     x_off += mv->x >> (2 + hshift);
1478     y_off += mv->y >> (2 + vshift);
1479     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1480
1481     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1482         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1483         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1484         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1485         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1486         int buf_offset0 = EPEL_EXTRA_BEFORE *
1487                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1488         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1489                                  edge_emu_stride, srcstride,
1490                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1491                                  x_off - EPEL_EXTRA_BEFORE,
1492                                  y_off - EPEL_EXTRA_BEFORE,
1493                                  pic_width, pic_height);
1494
1495         src0 = lc->edge_emu_buffer + buf_offset0;
1496         srcstride = edge_emu_stride;
1497     }
1498     if (!weight_flag)
1499         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1500                                                   block_h, _mx, _my, block_w);
1501     else
1502         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1503                                                         block_h, s->sh.chroma_log2_weight_denom,
1504                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1505 }
1506
1507 /**
1508  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1509  *
1510  * @param s HEVC decoding context
1511  * @param dst target buffer for block data at block position
1512  * @param dststride stride of the dst buffer
1513  * @param ref0 reference picture0 buffer at origin (0, 0)
1514  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1515  * @param x_off horizontal position of block from origin (0, 0)
1516  * @param y_off vertical position of block from origin (0, 0)
1517  * @param block_w width of block
1518  * @param block_h height of block
1519  * @param ref1 reference picture1 buffer at origin (0, 0)
1520  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1521  * @param current_mv current motion vector structure
1522  * @param cidx chroma component(cb, cr)
1523  */
1524 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1525                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1526 {
1527     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
1528     int tmpstride = MAX_PB_SIZE;
1529     HEVCLocalContext *lc = s->HEVClc;
1530     uint8_t *src1        = ref0->data[cidx+1];
1531     uint8_t *src2        = ref1->data[cidx+1];
1532     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1533     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1534     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1535                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1536     int pic_width        = s->sps->width >> s->sps->hshift[1];
1537     int pic_height       = s->sps->height >> s->sps->vshift[1];
1538     Mv *mv0              = &current_mv->mv[0];
1539     Mv *mv1              = &current_mv->mv[1];
1540     int hshift = s->sps->hshift[1];
1541     int vshift = s->sps->vshift[1];
1542
1543     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1544     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1545     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1546     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1547     intptr_t _mx0 = mx0 << (1 - hshift);
1548     intptr_t _my0 = my0 << (1 - vshift);
1549     intptr_t _mx1 = mx1 << (1 - hshift);
1550     intptr_t _my1 = my1 << (1 - vshift);
1551
1552     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1553     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1554     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1555     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1556     int idx = ff_hevc_pel_weight[block_w];
1557     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1558     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1559
1560     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1561         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1562         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1563         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1564         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1565         int buf_offset1 = EPEL_EXTRA_BEFORE *
1566                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1567
1568         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1569                                  edge_emu_stride, src1stride,
1570                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1571                                  x_off0 - EPEL_EXTRA_BEFORE,
1572                                  y_off0 - EPEL_EXTRA_BEFORE,
1573                                  pic_width, pic_height);
1574
1575         src1 = lc->edge_emu_buffer + buf_offset1;
1576         src1stride = edge_emu_stride;
1577     }
1578
1579     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1580         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1581         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1582         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1583         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1584         int buf_offset1 = EPEL_EXTRA_BEFORE *
1585                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1586
1587         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1588                                  edge_emu_stride, src2stride,
1589                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1590                                  x_off1 - EPEL_EXTRA_BEFORE,
1591                                  y_off1 - EPEL_EXTRA_BEFORE,
1592                                  pic_width, pic_height);
1593
1594         src2 = lc->edge_emu_buffer2 + buf_offset1;
1595         src2stride = edge_emu_stride;
1596     }
1597
1598     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
1599                                                 block_h, _mx0, _my0, block_w);
1600     if (!weight_flag)
1601         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1602                                                        src2, src2stride, tmp, tmpstride,
1603                                                        block_h, _mx1, _my1, block_w);
1604     else
1605         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1606                                                          src2, src2stride, tmp, tmpstride,
1607                                                          block_h,
1608                                                          s->sh.chroma_log2_weight_denom,
1609                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1610                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1611                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1612                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1613                                                          _mx1, _my1, block_w);
1614 }
1615
1616 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1617                                 const Mv *mv, int y0, int height)
1618 {
1619     int y = (mv->y >> 2) + y0 + height + 9;
1620
1621     if (s->threads_type == FF_THREAD_FRAME )
1622         ff_thread_await_progress(&ref->tf, y, 0);
1623 }
1624
1625 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1626                                 int nPbW, int nPbH,
1627                                 int log2_cb_size, int partIdx, int idx)
1628 {
1629 #define POS(c_idx, x, y)                                                              \
1630     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1631                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1632     HEVCLocalContext *lc = s->HEVClc;
1633     int merge_idx = 0;
1634     struct MvField current_mv = {{{ 0 }}};
1635
1636     int min_pu_width = s->sps->min_pu_width;
1637
1638     MvField *tab_mvf = s->ref->tab_mvf;
1639     RefPicList  *refPicList = s->ref->refPicList;
1640     HEVCFrame *ref0, *ref1;
1641     uint8_t *dst0 = POS(0, x0, y0);
1642     uint8_t *dst1 = POS(1, x0, y0);
1643     uint8_t *dst2 = POS(2, x0, y0);
1644     int log2_min_cb_size = s->sps->log2_min_cb_size;
1645     int min_cb_width     = s->sps->min_cb_width;
1646     int x_cb             = x0 >> log2_min_cb_size;
1647     int y_cb             = y0 >> log2_min_cb_size;
1648     int ref_idx[2];
1649     int mvp_flag[2];
1650     int x_pu, y_pu;
1651     int i, j;
1652
1653     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1654         if (s->sh.max_num_merge_cand > 1)
1655             merge_idx = ff_hevc_merge_idx_decode(s);
1656         else
1657             merge_idx = 0;
1658
1659         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1660                                    1 << log2_cb_size,
1661                                    1 << log2_cb_size,
1662                                    log2_cb_size, partIdx,
1663                                    merge_idx, &current_mv);
1664         x_pu = x0 >> s->sps->log2_min_pu_size;
1665         y_pu = y0 >> s->sps->log2_min_pu_size;
1666
1667         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1668             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1669                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1670     } else { /* MODE_INTER */
1671         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1672         if (lc->pu.merge_flag) {
1673             if (s->sh.max_num_merge_cand > 1)
1674                 merge_idx = ff_hevc_merge_idx_decode(s);
1675             else
1676                 merge_idx = 0;
1677
1678             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1679                                        partIdx, merge_idx, &current_mv);
1680             x_pu = x0 >> s->sps->log2_min_pu_size;
1681             y_pu = y0 >> s->sps->log2_min_pu_size;
1682
1683             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1684                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1685                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1686         } else {
1687             enum InterPredIdc inter_pred_idc = PRED_L0;
1688             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1689             current_mv.pred_flag = 0;
1690             if (s->sh.slice_type == B_SLICE)
1691                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1692
1693             if (inter_pred_idc != PRED_L1) {
1694                 if (s->sh.nb_refs[L0]) {
1695                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1696                     current_mv.ref_idx[0] = ref_idx[0];
1697                 }
1698                 current_mv.pred_flag = PF_L0;
1699                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1700                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1701                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1702                                          partIdx, merge_idx, &current_mv,
1703                                          mvp_flag[0], 0);
1704                 current_mv.mv[0].x += lc->pu.mvd.x;
1705                 current_mv.mv[0].y += lc->pu.mvd.y;
1706             }
1707
1708             if (inter_pred_idc != PRED_L0) {
1709                 if (s->sh.nb_refs[L1]) {
1710                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1711                     current_mv.ref_idx[1] = ref_idx[1];
1712                 }
1713
1714                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1715                     AV_ZERO32(&lc->pu.mvd);
1716                 } else {
1717                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1718                 }
1719
1720                 current_mv.pred_flag += PF_L1;
1721                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1722                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1723                                          partIdx, merge_idx, &current_mv,
1724                                          mvp_flag[1], 1);
1725                 current_mv.mv[1].x += lc->pu.mvd.x;
1726                 current_mv.mv[1].y += lc->pu.mvd.y;
1727             }
1728
1729             x_pu = x0 >> s->sps->log2_min_pu_size;
1730             y_pu = y0 >> s->sps->log2_min_pu_size;
1731
1732             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1733                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1734                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1735         }
1736     }
1737
1738     if (current_mv.pred_flag & PF_L0) {
1739         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1740         if (!ref0)
1741             return;
1742         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1743     }
1744     if (current_mv.pred_flag & PF_L1) {
1745         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1746         if (!ref1)
1747             return;
1748         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1749     }
1750
1751     if (current_mv.pred_flag == PF_L0) {
1752         int x0_c = x0 >> s->sps->hshift[1];
1753         int y0_c = y0 >> s->sps->vshift[1];
1754         int nPbW_c = nPbW >> s->sps->hshift[1];
1755         int nPbH_c = nPbH >> s->sps->vshift[1];
1756
1757         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1758                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1759                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1760                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1761
1762         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1763                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1764                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1765         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1766                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1767                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1768     } else if (current_mv.pred_flag == PF_L1) {
1769         int x0_c = x0 >> s->sps->hshift[1];
1770         int y0_c = y0 >> s->sps->vshift[1];
1771         int nPbW_c = nPbW >> s->sps->hshift[1];
1772         int nPbH_c = nPbH >> s->sps->vshift[1];
1773
1774         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1775                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1776                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1777                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1778
1779         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1780                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1781                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1782
1783         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1784                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1785                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1786     } else if (current_mv.pred_flag == PF_BI) {
1787         int x0_c = x0 >> s->sps->hshift[1];
1788         int y0_c = y0 >> s->sps->vshift[1];
1789         int nPbW_c = nPbW >> s->sps->hshift[1];
1790         int nPbH_c = nPbH >> s->sps->vshift[1];
1791
1792         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1793                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1794                    ref1->frame, &current_mv.mv[1], &current_mv);
1795
1796         chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1797                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1798
1799         chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1800                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1801     }
1802 }
1803
1804 /**
1805  * 8.4.1
1806  */
1807 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1808                                 int prev_intra_luma_pred_flag)
1809 {
1810     HEVCLocalContext *lc = s->HEVClc;
1811     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1812     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1813     int min_pu_width     = s->sps->min_pu_width;
1814     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1815     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1816     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1817
1818     int cand_up   = (lc->ctb_up_flag || y0b) ?
1819                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1820     int cand_left = (lc->ctb_left_flag || x0b) ?
1821                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1822
1823     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1824
1825     MvField *tab_mvf = s->ref->tab_mvf;
1826     int intra_pred_mode;
1827     int candidate[3];
1828     int i, j;
1829
1830     // intra_pred_mode prediction does not cross vertical CTB boundaries
1831     if ((y0 - 1) < y_ctb)
1832         cand_up = INTRA_DC;
1833
1834     if (cand_left == cand_up) {
1835         if (cand_left < 2) {
1836             candidate[0] = INTRA_PLANAR;
1837             candidate[1] = INTRA_DC;
1838             candidate[2] = INTRA_ANGULAR_26;
1839         } else {
1840             candidate[0] = cand_left;
1841             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1842             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1843         }
1844     } else {
1845         candidate[0] = cand_left;
1846         candidate[1] = cand_up;
1847         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1848             candidate[2] = INTRA_PLANAR;
1849         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1850             candidate[2] = INTRA_DC;
1851         } else {
1852             candidate[2] = INTRA_ANGULAR_26;
1853         }
1854     }
1855
1856     if (prev_intra_luma_pred_flag) {
1857         intra_pred_mode = candidate[lc->pu.mpm_idx];
1858     } else {
1859         if (candidate[0] > candidate[1])
1860             FFSWAP(uint8_t, candidate[0], candidate[1]);
1861         if (candidate[0] > candidate[2])
1862             FFSWAP(uint8_t, candidate[0], candidate[2]);
1863         if (candidate[1] > candidate[2])
1864             FFSWAP(uint8_t, candidate[1], candidate[2]);
1865
1866         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1867         for (i = 0; i < 3; i++)
1868             if (intra_pred_mode >= candidate[i])
1869                 intra_pred_mode++;
1870     }
1871
1872     /* write the intra prediction units into the mv array */
1873     if (!size_in_pus)
1874         size_in_pus = 1;
1875     for (i = 0; i < size_in_pus; i++) {
1876         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1877                intra_pred_mode, size_in_pus);
1878
1879         for (j = 0; j < size_in_pus; j++) {
1880             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1881         }
1882     }
1883
1884     return intra_pred_mode;
1885 }
1886
1887 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1888                                           int log2_cb_size, int ct_depth)
1889 {
1890     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1891     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1892     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1893     int y;
1894
1895     for (y = 0; y < length; y++)
1896         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1897                ct_depth, length);
1898 }
1899
1900 static const uint8_t tab_mode_idx[] = {
1901      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1902     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1903
1904 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1905                                   int log2_cb_size)
1906 {
1907     HEVCLocalContext *lc = s->HEVClc;
1908     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1909     uint8_t prev_intra_luma_pred_flag[4];
1910     int split   = lc->cu.part_mode == PART_NxN;
1911     int pb_size = (1 << log2_cb_size) >> split;
1912     int side    = split + 1;
1913     int chroma_mode;
1914     int i, j;
1915
1916     for (i = 0; i < side; i++)
1917         for (j = 0; j < side; j++)
1918             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1919
1920     for (i = 0; i < side; i++) {
1921         for (j = 0; j < side; j++) {
1922             if (prev_intra_luma_pred_flag[2 * i + j])
1923                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1924             else
1925                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1926
1927             lc->pu.intra_pred_mode[2 * i + j] =
1928                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1929                                      prev_intra_luma_pred_flag[2 * i + j]);
1930         }
1931     }
1932
1933     if (s->sps->chroma_format_idc == 3) {
1934         for (i = 0; i < side; i++) {
1935             for (j = 0; j < side; j++) {
1936                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1937                 if (chroma_mode != 4) {
1938                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1939                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1940                     else
1941                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1942                 } else {
1943                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1944                 }
1945             }
1946         }
1947     } else if (s->sps->chroma_format_idc == 2) {
1948         int mode_idx;
1949         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1950         if (chroma_mode != 4) {
1951             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1952                 mode_idx = 34;
1953             else
1954                 mode_idx = intra_chroma_table[chroma_mode];
1955         } else {
1956             mode_idx = lc->pu.intra_pred_mode[0];
1957         }
1958         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1959     } else if (s->sps->chroma_format_idc != 0) {
1960         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1961         if (chroma_mode != 4) {
1962             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1963                 lc->pu.intra_pred_mode_c[0] = 34;
1964             else
1965                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1966         } else {
1967             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1968         }
1969     }
1970 }
1971
1972 static void intra_prediction_unit_default_value(HEVCContext *s,
1973                                                 int x0, int y0,
1974                                                 int log2_cb_size)
1975 {
1976     HEVCLocalContext *lc = s->HEVClc;
1977     int pb_size          = 1 << log2_cb_size;
1978     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1979     int min_pu_width     = s->sps->min_pu_width;
1980     MvField *tab_mvf     = s->ref->tab_mvf;
1981     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1982     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1983     int j, k;
1984
1985     if (size_in_pus == 0)
1986         size_in_pus = 1;
1987     for (j = 0; j < size_in_pus; j++)
1988         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1989     if (lc->cu.pred_mode == MODE_INTRA)
1990         for (j = 0; j < size_in_pus; j++)
1991             for (k = 0; k < size_in_pus; k++)
1992                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1993 }
1994
1995 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1996 {
1997     int cb_size          = 1 << log2_cb_size;
1998     HEVCLocalContext *lc = s->HEVClc;
1999     int log2_min_cb_size = s->sps->log2_min_cb_size;
2000     int length           = cb_size >> log2_min_cb_size;
2001     int min_cb_width     = s->sps->min_cb_width;
2002     int x_cb             = x0 >> log2_min_cb_size;
2003     int y_cb             = y0 >> log2_min_cb_size;
2004     int idx              = log2_cb_size - 2;
2005     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2006     int x, y, ret;
2007
2008     lc->cu.x                = x0;
2009     lc->cu.y                = y0;
2010     lc->cu.rqt_root_cbf     = 1;
2011     lc->cu.pred_mode        = MODE_INTRA;
2012     lc->cu.part_mode        = PART_2Nx2N;
2013     lc->cu.intra_split_flag = 0;
2014     lc->cu.pcm_flag         = 0;
2015
2016     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2017     for (x = 0; x < 4; x++)
2018         lc->pu.intra_pred_mode[x] = 1;
2019     if (s->pps->transquant_bypass_enable_flag) {
2020         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2021         if (lc->cu.cu_transquant_bypass_flag)
2022             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2023     } else
2024         lc->cu.cu_transquant_bypass_flag = 0;
2025
2026     if (s->sh.slice_type != I_SLICE) {
2027         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2028
2029         x = y_cb * min_cb_width + x_cb;
2030         for (y = 0; y < length; y++) {
2031             memset(&s->skip_flag[x], skip_flag, length);
2032             x += min_cb_width;
2033         }
2034         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2035     }
2036
2037     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2038         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2039         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2040
2041         if (!s->sh.disable_deblocking_filter_flag)
2042             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2043     } else {
2044         if (s->sh.slice_type != I_SLICE)
2045             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2046         if (lc->cu.pred_mode != MODE_INTRA ||
2047             log2_cb_size == s->sps->log2_min_cb_size) {
2048             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2049             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2050                                       lc->cu.pred_mode == MODE_INTRA;
2051         }
2052
2053         if (lc->cu.pred_mode == MODE_INTRA) {
2054             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2055                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2056                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2057                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2058             }
2059             if (lc->cu.pcm_flag) {
2060                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2061                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2062                 if (s->sps->pcm.loop_filter_disable_flag)
2063                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2064
2065                 if (ret < 0)
2066                     return ret;
2067             } else {
2068                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2069             }
2070         } else {
2071             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2072             switch (lc->cu.part_mode) {
2073             case PART_2Nx2N:
2074                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2075                 break;
2076             case PART_2NxN:
2077                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2078                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2079                 break;
2080             case PART_Nx2N:
2081                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2082                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2083                 break;
2084             case PART_2NxnU:
2085                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2086                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2087                 break;
2088             case PART_2NxnD:
2089                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2090                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2091                 break;
2092             case PART_nLx2N:
2093                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2094                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2095                 break;
2096             case PART_nRx2N:
2097                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2098                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2099                 break;
2100             case PART_NxN:
2101                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2102                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2103                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2104                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2105                 break;
2106             }
2107         }
2108
2109         if (!lc->cu.pcm_flag) {
2110             if (lc->cu.pred_mode != MODE_INTRA &&
2111                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2112                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2113             }
2114             if (lc->cu.rqt_root_cbf) {
2115                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2116                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2117                                          s->sps->max_transform_hierarchy_depth_inter;
2118                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2119                                          log2_cb_size,
2120                                          log2_cb_size, 0, 0);
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 if (!s->sh.disable_deblocking_filter_flag)
2125                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2126             }
2127         }
2128     }
2129
2130     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2131         ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
2132
2133     x = y_cb * min_cb_width + x_cb;
2134     for (y = 0; y < length; y++) {
2135         memset(&s->qp_y_tab[x], lc->qp_y, length);
2136         x += min_cb_width;
2137     }
2138
2139     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2140        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2141         lc->qPy_pred = lc->qp_y;
2142     }
2143
2144     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
2145
2146     return 0;
2147 }
2148
2149 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2150                                int log2_cb_size, int cb_depth)
2151 {
2152     HEVCLocalContext *lc = s->HEVClc;
2153     const int cb_size    = 1 << log2_cb_size;
2154     int ret;
2155     int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2156     int split_cu_flag;
2157
2158     lc->ct.depth = cb_depth;
2159     if (x0 + cb_size <= s->sps->width  &&
2160         y0 + cb_size <= s->sps->height &&
2161         log2_cb_size > s->sps->log2_min_cb_size) {
2162         split_cu_flag = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2163     } else {
2164         split_cu_flag = (log2_cb_size > s->sps->log2_min_cb_size);
2165     }
2166     if (s->pps->cu_qp_delta_enabled_flag &&
2167         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2168         lc->tu.is_cu_qp_delta_coded = 0;
2169         lc->tu.cu_qp_delta          = 0;
2170     }
2171
2172     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2173         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2174         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2175     }
2176
2177     if (split_cu_flag) {
2178         const int cb_size_split = cb_size >> 1;
2179         const int x1 = x0 + cb_size_split;
2180         const int y1 = y0 + cb_size_split;
2181
2182         int more_data = 0;
2183
2184         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2185         if (more_data < 0)
2186             return more_data;
2187
2188         if (more_data && x1 < s->sps->width) {
2189             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2190             if (more_data < 0)
2191                 return more_data;
2192         }
2193         if (more_data && y1 < s->sps->height) {
2194             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2195             if (more_data < 0)
2196                 return more_data;
2197         }
2198         if (more_data && x1 < s->sps->width &&
2199             y1 < s->sps->height) {
2200             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204
2205         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2206             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2207             lc->qPy_pred = lc->qp_y;
2208
2209         if (more_data)
2210             return ((x1 + cb_size_split) < s->sps->width ||
2211                     (y1 + cb_size_split) < s->sps->height);
2212         else
2213             return 0;
2214     } else {
2215         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2216         if (ret < 0)
2217             return ret;
2218         if ((!((x0 + cb_size) %
2219                (1 << (s->sps->log2_ctb_size))) ||
2220              (x0 + cb_size >= s->sps->width)) &&
2221             (!((y0 + cb_size) %
2222                (1 << (s->sps->log2_ctb_size))) ||
2223              (y0 + cb_size >= s->sps->height))) {
2224             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2225             return !end_of_slice_flag;
2226         } else {
2227             return 1;
2228         }
2229     }
2230
2231     return 0;
2232 }
2233
2234 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2235                                  int ctb_addr_ts)
2236 {
2237     HEVCLocalContext *lc  = s->HEVClc;
2238     int ctb_size          = 1 << s->sps->log2_ctb_size;
2239     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2240     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2241
2242     int tile_left_boundary, tile_up_boundary;
2243     int slice_left_boundary, slice_up_boundary;
2244
2245     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2246
2247     if (s->pps->entropy_coding_sync_enabled_flag) {
2248         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2249             lc->first_qp_group = 1;
2250         lc->end_of_tiles_x = s->sps->width;
2251     } else if (s->pps->tiles_enabled_flag) {
2252         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2253             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2254             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2255             lc->first_qp_group   = 1;
2256         }
2257     } else {
2258         lc->end_of_tiles_x = s->sps->width;
2259     }
2260
2261     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2262
2263     if (s->pps->tiles_enabled_flag) {
2264         tile_left_boundary = x_ctb > 0 &&
2265                              s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
2266         slice_left_boundary = x_ctb > 0 &&
2267                               s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1];
2268         tile_up_boundary  = y_ctb > 0 &&
2269                             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
2270         slice_up_boundary = y_ctb > 0 &&
2271                             s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
2272     } else {
2273         tile_left_boundary =
2274         tile_up_boundary   = 0;
2275         slice_left_boundary = ctb_addr_in_slice <= 0;
2276         slice_up_boundary   = ctb_addr_in_slice < s->sps->ctb_width;
2277     }
2278     lc->slice_or_tiles_left_boundary = slice_left_boundary + (tile_left_boundary << 1);
2279     lc->slice_or_tiles_up_boundary   = slice_up_boundary   + (tile_up_boundary   << 1);
2280     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0)                  && !tile_left_boundary);
2281     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !tile_up_boundary);
2282     lc->ctb_up_right_flag = ((y_ctb > 0)                 && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2283     lc->ctb_up_left_flag  = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2284 }
2285
2286 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2287 {
2288     HEVCContext *s  = avctxt->priv_data;
2289     int ctb_size    = 1 << s->sps->log2_ctb_size;
2290     int more_data   = 1;
2291     int x_ctb       = 0;
2292     int y_ctb       = 0;
2293     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2294
2295     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2296         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2297         return AVERROR_INVALIDDATA;
2298     }
2299
2300     if (s->sh.dependent_slice_segment_flag) {
2301         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2302         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2303             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2304             return AVERROR_INVALIDDATA;
2305         }
2306     }
2307
2308     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2309         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2310
2311         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2312         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2313         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2314
2315         ff_hevc_cabac_init(s, ctb_addr_ts);
2316
2317         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2318
2319         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2320         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2321         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2322
2323         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2324         if (more_data < 0) {
2325             s->tab_slice_address[ctb_addr_rs] = -1;
2326             return more_data;
2327         }
2328
2329
2330         ctb_addr_ts++;
2331         ff_hevc_save_states(s, ctb_addr_ts);
2332         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2333     }
2334
2335     if (x_ctb + ctb_size >= s->sps->width &&
2336         y_ctb + ctb_size >= s->sps->height)
2337         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2338
2339     return ctb_addr_ts;
2340 }
2341
2342 static int hls_slice_data(HEVCContext *s)
2343 {
2344     int arg[2];
2345     int ret[2];
2346
2347     arg[0] = 0;
2348     arg[1] = 1;
2349
2350     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2351     return ret[0];
2352 }
2353 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2354 {
2355     HEVCContext *s1  = avctxt->priv_data, *s;
2356     HEVCLocalContext *lc;
2357     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2358     int more_data   = 1;
2359     int *ctb_row_p    = input_ctb_row;
2360     int ctb_row = ctb_row_p[job];
2361     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2362     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2363     int thread = ctb_row % s1->threads_number;
2364     int ret;
2365
2366     s = s1->sList[self_id];
2367     lc = s->HEVClc;
2368
2369     if(ctb_row) {
2370         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2371
2372         if (ret < 0)
2373             return ret;
2374         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2375     }
2376
2377     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2378         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2379         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2380
2381         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2382
2383         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2384
2385         if (avpriv_atomic_int_get(&s1->wpp_err)){
2386             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2387             return 0;
2388         }
2389
2390         ff_hevc_cabac_init(s, ctb_addr_ts);
2391         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2392         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2393
2394         if (more_data < 0) {
2395             s->tab_slice_address[ctb_addr_rs] = -1;
2396             return more_data;
2397         }
2398
2399         ctb_addr_ts++;
2400
2401         ff_hevc_save_states(s, ctb_addr_ts);
2402         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2403         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2404
2405         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2406             avpriv_atomic_int_set(&s1->wpp_err,  1);
2407             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2408             return 0;
2409         }
2410
2411         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2412             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2413             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2414             return ctb_addr_ts;
2415         }
2416         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2417         x_ctb+=ctb_size;
2418
2419         if(x_ctb >= s->sps->width) {
2420             break;
2421         }
2422     }
2423     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2424
2425     return 0;
2426 }
2427
2428 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2429 {
2430     HEVCLocalContext *lc = s->HEVClc;
2431     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2432     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2433     int offset;
2434     int startheader, cmpt = 0;
2435     int i, j, res = 0;
2436
2437
2438     if (!s->sList[1]) {
2439         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2440
2441
2442         for (i = 1; i < s->threads_number; i++) {
2443             s->sList[i] = av_malloc(sizeof(HEVCContext));
2444             memcpy(s->sList[i], s, sizeof(HEVCContext));
2445             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2446             s->sList[i]->HEVClc = s->HEVClcList[i];
2447         }
2448     }
2449
2450     offset = (lc->gb.index >> 3);
2451
2452     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2453         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2454             startheader--;
2455             cmpt++;
2456         }
2457     }
2458
2459     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2460         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2461         for (j = 0, cmpt = 0, startheader = offset
2462              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2463             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2464                 startheader--;
2465                 cmpt++;
2466             }
2467         }
2468         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2469         s->sh.offset[i - 1] = offset;
2470
2471     }
2472     if (s->sh.num_entry_point_offsets != 0) {
2473         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2474         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2475         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2476
2477     }
2478     s->data = nal;
2479
2480     for (i = 1; i < s->threads_number; i++) {
2481         s->sList[i]->HEVClc->first_qp_group = 1;
2482         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2483         memcpy(s->sList[i], s, sizeof(HEVCContext));
2484         s->sList[i]->HEVClc = s->HEVClcList[i];
2485     }
2486
2487     avpriv_atomic_int_set(&s->wpp_err, 0);
2488     ff_reset_entries(s->avctx);
2489
2490     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2491         arg[i] = i;
2492         ret[i] = 0;
2493     }
2494
2495     if (s->pps->entropy_coding_sync_enabled_flag)
2496         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2497
2498     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2499         res += ret[i];
2500     av_free(ret);
2501     av_free(arg);
2502     return res;
2503 }
2504
2505 /**
2506  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2507  * 0 if the unit should be skipped, 1 otherwise
2508  */
2509 static int hls_nal_unit(HEVCContext *s)
2510 {
2511     GetBitContext *gb = &s->HEVClc->gb;
2512     int nuh_layer_id;
2513
2514     if (get_bits1(gb) != 0)
2515         return AVERROR_INVALIDDATA;
2516
2517     s->nal_unit_type = get_bits(gb, 6);
2518
2519     nuh_layer_id   = get_bits(gb, 6);
2520     s->temporal_id = get_bits(gb, 3) - 1;
2521     if (s->temporal_id < 0)
2522         return AVERROR_INVALIDDATA;
2523
2524     av_log(s->avctx, AV_LOG_DEBUG,
2525            "nal_unit_type: %d, nuh_layer_id: %dtemporal_id: %d\n",
2526            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2527
2528     return nuh_layer_id == 0;
2529 }
2530
2531 static int set_side_data(HEVCContext *s)
2532 {
2533     AVFrame *out = s->ref->frame;
2534
2535     if (s->sei_frame_packing_present &&
2536         s->frame_packing_arrangement_type >= 3 &&
2537         s->frame_packing_arrangement_type <= 5 &&
2538         s->content_interpretation_type > 0 &&
2539         s->content_interpretation_type < 3) {
2540         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2541         if (!stereo)
2542             return AVERROR(ENOMEM);
2543
2544         switch (s->frame_packing_arrangement_type) {
2545         case 3:
2546             if (s->quincunx_subsampling)
2547                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2548             else
2549                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2550             break;
2551         case 4:
2552             stereo->type = AV_STEREO3D_TOPBOTTOM;
2553             break;
2554         case 5:
2555             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2556             break;
2557         }
2558
2559         if (s->content_interpretation_type == 2)
2560             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2561     }
2562
2563     if (s->sei_display_orientation_present &&
2564         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2565         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2566         AVFrameSideData *rotation = av_frame_new_side_data(out,
2567                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2568                                                            sizeof(int32_t) * 9);
2569         if (!rotation)
2570             return AVERROR(ENOMEM);
2571
2572         av_display_rotation_set((int32_t *)rotation->data, angle);
2573         av_display_matrix_flip((int32_t *)rotation->data,
2574                                s->sei_vflip, s->sei_hflip);
2575     }
2576
2577     return 0;
2578 }
2579
2580 static int hevc_frame_start(HEVCContext *s)
2581 {
2582     HEVCLocalContext *lc = s->HEVClc;
2583     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2584                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2585     int ret;
2586
2587     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2588     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2589     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2590     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2591     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2592
2593     s->is_decoded        = 0;
2594     s->first_nal_type    = s->nal_unit_type;
2595
2596     if (s->pps->tiles_enabled_flag)
2597         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2598
2599     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2600     if (ret < 0)
2601         goto fail;
2602
2603     ret = ff_hevc_frame_rps(s);
2604     if (ret < 0) {
2605         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2606         goto fail;
2607     }
2608
2609     s->ref->frame->key_frame = IS_IRAP(s);
2610
2611     ret = set_side_data(s);
2612     if (ret < 0)
2613         goto fail;
2614
2615     s->frame->pict_type = 3 - s->sh.slice_type;
2616
2617     av_frame_unref(s->output_frame);
2618     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2619     if (ret < 0)
2620         goto fail;
2621
2622     ff_thread_finish_setup(s->avctx);
2623
2624     return 0;
2625
2626 fail:
2627     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2628         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2629     s->ref = NULL;
2630     return ret;
2631 }
2632
2633 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2634 {
2635     HEVCLocalContext *lc = s->HEVClc;
2636     GetBitContext *gb    = &lc->gb;
2637     int ctb_addr_ts, ret;
2638
2639     ret = init_get_bits8(gb, nal, length);
2640     if (ret < 0)
2641         return ret;
2642
2643     ret = hls_nal_unit(s);
2644     if (ret < 0) {
2645         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2646                s->nal_unit_type);
2647         goto fail;
2648     } else if (!ret)
2649         return 0;
2650
2651     switch (s->nal_unit_type) {
2652     case NAL_VPS:
2653         ret = ff_hevc_decode_nal_vps(s);
2654         if (ret < 0)
2655             goto fail;
2656         break;
2657     case NAL_SPS:
2658         ret = ff_hevc_decode_nal_sps(s);
2659         if (ret < 0)
2660             goto fail;
2661         break;
2662     case NAL_PPS:
2663         ret = ff_hevc_decode_nal_pps(s);
2664         if (ret < 0)
2665             goto fail;
2666         break;
2667     case NAL_SEI_PREFIX:
2668     case NAL_SEI_SUFFIX:
2669         ret = ff_hevc_decode_nal_sei(s);
2670         if (ret < 0)
2671             goto fail;
2672         break;
2673     case NAL_TRAIL_R:
2674     case NAL_TRAIL_N:
2675     case NAL_TSA_N:
2676     case NAL_TSA_R:
2677     case NAL_STSA_N:
2678     case NAL_STSA_R:
2679     case NAL_BLA_W_LP:
2680     case NAL_BLA_W_RADL:
2681     case NAL_BLA_N_LP:
2682     case NAL_IDR_W_RADL:
2683     case NAL_IDR_N_LP:
2684     case NAL_CRA_NUT:
2685     case NAL_RADL_N:
2686     case NAL_RADL_R:
2687     case NAL_RASL_N:
2688     case NAL_RASL_R:
2689         ret = hls_slice_header(s);
2690         if (ret < 0)
2691             return ret;
2692
2693         if (s->max_ra == INT_MAX) {
2694             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2695                 s->max_ra = s->poc;
2696             } else {
2697                 if (IS_IDR(s))
2698                     s->max_ra = INT_MIN;
2699             }
2700         }
2701
2702         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2703             s->poc <= s->max_ra) {
2704             s->is_decoded = 0;
2705             break;
2706         } else {
2707             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2708                 s->max_ra = INT_MIN;
2709         }
2710
2711         if (s->sh.first_slice_in_pic_flag) {
2712             ret = hevc_frame_start(s);
2713             if (ret < 0)
2714                 return ret;
2715         } else if (!s->ref) {
2716             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2717             goto fail;
2718         }
2719
2720         if (s->nal_unit_type != s->first_nal_type) {
2721             av_log(s->avctx, AV_LOG_ERROR,
2722                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2723                    s->first_nal_type, s->nal_unit_type);
2724             return AVERROR_INVALIDDATA;
2725         }
2726
2727         if (!s->sh.dependent_slice_segment_flag &&
2728             s->sh.slice_type != I_SLICE) {
2729             ret = ff_hevc_slice_rpl(s);
2730             if (ret < 0) {
2731                 av_log(s->avctx, AV_LOG_WARNING,
2732                        "Error constructing the reference lists for the current slice.\n");
2733                 goto fail;
2734             }
2735         }
2736
2737         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2738             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2739         else
2740             ctb_addr_ts = hls_slice_data(s);
2741         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2742             s->is_decoded = 1;
2743         }
2744
2745         if (ctb_addr_ts < 0) {
2746             ret = ctb_addr_ts;
2747             goto fail;
2748         }
2749         break;
2750     case NAL_EOS_NUT:
2751     case NAL_EOB_NUT:
2752         s->seq_decode = (s->seq_decode + 1) & 0xff;
2753         s->max_ra     = INT_MAX;
2754         break;
2755     case NAL_AUD:
2756     case NAL_FD_NUT:
2757         break;
2758     default:
2759         av_log(s->avctx, AV_LOG_INFO,
2760                "Skipping NAL unit %d\n", s->nal_unit_type);
2761     }
2762
2763     return 0;
2764 fail:
2765     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2766         return ret;
2767     return 0;
2768 }
2769
2770 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2771  * between these functions would be nice. */
2772 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2773                          HEVCNAL *nal)
2774 {
2775     int i, si, di;
2776     uint8_t *dst;
2777
2778     s->skipped_bytes = 0;
2779 #define STARTCODE_TEST                                                  \
2780         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2781             if (src[i + 2] != 3) {                                      \
2782                 /* startcode, so we must be past the end */             \
2783                 length = i;                                             \
2784             }                                                           \
2785             break;                                                      \
2786         }
2787 #if HAVE_FAST_UNALIGNED
2788 #define FIND_FIRST_ZERO                                                 \
2789         if (i > 0 && !src[i])                                           \
2790             i--;                                                        \
2791         while (src[i])                                                  \
2792             i++
2793 #if HAVE_FAST_64BIT
2794     for (i = 0; i + 1 < length; i += 9) {
2795         if (!((~AV_RN64A(src + i) &
2796                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2797               0x8000800080008080ULL))
2798             continue;
2799         FIND_FIRST_ZERO;
2800         STARTCODE_TEST;
2801         i -= 7;
2802     }
2803 #else
2804     for (i = 0; i + 1 < length; i += 5) {
2805         if (!((~AV_RN32A(src + i) &
2806                (AV_RN32A(src + i) - 0x01000101U)) &
2807               0x80008080U))
2808             continue;
2809         FIND_FIRST_ZERO;
2810         STARTCODE_TEST;
2811         i -= 3;
2812     }
2813 #endif /* HAVE_FAST_64BIT */
2814 #else
2815     for (i = 0; i + 1 < length; i += 2) {
2816         if (src[i])
2817             continue;
2818         if (i > 0 && src[i - 1] == 0)
2819             i--;
2820         STARTCODE_TEST;
2821     }
2822 #endif /* HAVE_FAST_UNALIGNED */
2823
2824     if (i >= length - 1) { // no escaped 0
2825         nal->data = src;
2826         nal->size = length;
2827         return length;
2828     }
2829
2830     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2831                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2832     if (!nal->rbsp_buffer)
2833         return AVERROR(ENOMEM);
2834
2835     dst = nal->rbsp_buffer;
2836
2837     memcpy(dst, src, i);
2838     si = di = i;
2839     while (si + 2 < length) {
2840         // remove escapes (very rare 1:2^22)
2841         if (src[si + 2] > 3) {
2842             dst[di++] = src[si++];
2843             dst[di++] = src[si++];
2844         } else if (src[si] == 0 && src[si + 1] == 0) {
2845             if (src[si + 2] == 3) { // escape
2846                 dst[di++] = 0;
2847                 dst[di++] = 0;
2848                 si       += 3;
2849
2850                 s->skipped_bytes++;
2851                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2852                     s->skipped_bytes_pos_size *= 2;
2853                     av_reallocp_array(&s->skipped_bytes_pos,
2854                             s->skipped_bytes_pos_size,
2855                             sizeof(*s->skipped_bytes_pos));
2856                     if (!s->skipped_bytes_pos)
2857                         return AVERROR(ENOMEM);
2858                 }
2859                 if (s->skipped_bytes_pos)
2860                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2861                 continue;
2862             } else // next start code
2863                 goto nsc;
2864         }
2865
2866         dst[di++] = src[si++];
2867     }
2868     while (si < length)
2869         dst[di++] = src[si++];
2870
2871 nsc:
2872     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2873
2874     nal->data = dst;
2875     nal->size = di;
2876     return si;
2877 }
2878
2879 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2880 {
2881     int i, consumed, ret = 0;
2882
2883     s->ref = NULL;
2884     s->last_eos = s->eos;
2885     s->eos = 0;
2886
2887     /* split the input packet into NAL units, so we know the upper bound on the
2888      * number of slices in the frame */
2889     s->nb_nals = 0;
2890     while (length >= 4) {
2891         HEVCNAL *nal;
2892         int extract_length = 0;
2893
2894         if (s->is_nalff) {
2895             int i;
2896             for (i = 0; i < s->nal_length_size; i++)
2897                 extract_length = (extract_length << 8) | buf[i];
2898             buf    += s->nal_length_size;
2899             length -= s->nal_length_size;
2900
2901             if (extract_length > length) {
2902                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2903                 ret = AVERROR_INVALIDDATA;
2904                 goto fail;
2905             }
2906         } else {
2907             /* search start code */
2908             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2909                 ++buf;
2910                 --length;
2911                 if (length < 4) {
2912                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2913                     ret = AVERROR_INVALIDDATA;
2914                     goto fail;
2915                 }
2916             }
2917
2918             buf           += 3;
2919             length        -= 3;
2920         }
2921
2922         if (!s->is_nalff)
2923             extract_length = length;
2924
2925         if (s->nals_allocated < s->nb_nals + 1) {
2926             int new_size = s->nals_allocated + 1;
2927             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2928             if (!tmp) {
2929                 ret = AVERROR(ENOMEM);
2930                 goto fail;
2931             }
2932             s->nals = tmp;
2933             memset(s->nals + s->nals_allocated, 0,
2934                    (new_size - s->nals_allocated) * sizeof(*tmp));
2935             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2936             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2937             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2938             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2939             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2940             s->nals_allocated = new_size;
2941         }
2942         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2943         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2944         nal = &s->nals[s->nb_nals];
2945
2946         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2947
2948         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2949         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2950         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2951
2952
2953         if (consumed < 0) {
2954             ret = consumed;
2955             goto fail;
2956         }
2957
2958         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2959         if (ret < 0)
2960             goto fail;
2961         hls_nal_unit(s);
2962
2963         if (s->nal_unit_type == NAL_EOB_NUT ||
2964             s->nal_unit_type == NAL_EOS_NUT)
2965             s->eos = 1;
2966
2967         buf    += consumed;
2968         length -= consumed;
2969     }
2970
2971     /* parse the NAL units */
2972     for (i = 0; i < s->nb_nals; i++) {
2973         int ret;
2974         s->skipped_bytes = s->skipped_bytes_nal[i];
2975         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2976
2977         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2978         if (ret < 0) {
2979             av_log(s->avctx, AV_LOG_WARNING,
2980                    "Error parsing NAL unit #%d.\n", i);
2981             goto fail;
2982         }
2983     }
2984
2985 fail:
2986     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2987         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2988
2989     return ret;
2990 }
2991
2992 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2993 {
2994     int i;
2995     for (i = 0; i < 16; i++)
2996         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2997 }
2998
2999 static int verify_md5(HEVCContext *s, AVFrame *frame)
3000 {
3001     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3002     int pixel_shift;
3003     int i, j;
3004
3005     if (!desc)
3006         return AVERROR(EINVAL);
3007
3008     pixel_shift = desc->comp[0].depth_minus1 > 7;
3009
3010     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3011            s->poc);
3012
3013     /* the checksums are LE, so we have to byteswap for >8bpp formats
3014      * on BE arches */
3015 #if HAVE_BIGENDIAN
3016     if (pixel_shift && !s->checksum_buf) {
3017         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3018                        FFMAX3(frame->linesize[0], frame->linesize[1],
3019                               frame->linesize[2]));
3020         if (!s->checksum_buf)
3021             return AVERROR(ENOMEM);
3022     }
3023 #endif
3024
3025     for (i = 0; frame->data[i]; i++) {
3026         int width  = s->avctx->coded_width;
3027         int height = s->avctx->coded_height;
3028         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3029         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3030         uint8_t md5[16];
3031
3032         av_md5_init(s->md5_ctx);
3033         for (j = 0; j < h; j++) {
3034             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3035 #if HAVE_BIGENDIAN
3036             if (pixel_shift) {
3037                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3038                                     (const uint16_t *) src, w);
3039                 src = s->checksum_buf;
3040             }
3041 #endif
3042             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3043         }
3044         av_md5_final(s->md5_ctx, md5);
3045
3046         if (!memcmp(md5, s->md5[i], 16)) {
3047             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3048             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3049             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3050         } else {
3051             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3052             print_md5(s->avctx, AV_LOG_ERROR, md5);
3053             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3054             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3055             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3056             return AVERROR_INVALIDDATA;
3057         }
3058     }
3059
3060     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3061
3062     return 0;
3063 }
3064
3065 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3066                              AVPacket *avpkt)
3067 {
3068     int ret;
3069     HEVCContext *s = avctx->priv_data;
3070
3071     if (!avpkt->size) {
3072         ret = ff_hevc_output_frame(s, data, 1);
3073         if (ret < 0)
3074             return ret;
3075
3076         *got_output = ret;
3077         return 0;
3078     }
3079
3080     s->ref = NULL;
3081     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3082     if (ret < 0)
3083         return ret;
3084
3085     /* verify the SEI checksum */
3086     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3087         s->is_md5) {
3088         ret = verify_md5(s, s->ref->frame);
3089         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3090             ff_hevc_unref_frame(s, s->ref, ~0);
3091             return ret;
3092         }
3093     }
3094     s->is_md5 = 0;
3095
3096     if (s->is_decoded) {
3097         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3098         s->is_decoded = 0;
3099     }
3100
3101     if (s->output_frame->buf[0]) {
3102         av_frame_move_ref(data, s->output_frame);
3103         *got_output = 1;
3104     }
3105
3106     return avpkt->size;
3107 }
3108
3109 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3110 {
3111     int ret;
3112
3113     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3114     if (ret < 0)
3115         return ret;
3116
3117     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3118     if (!dst->tab_mvf_buf)
3119         goto fail;
3120     dst->tab_mvf = src->tab_mvf;
3121
3122     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3123     if (!dst->rpl_tab_buf)
3124         goto fail;
3125     dst->rpl_tab = src->rpl_tab;
3126
3127     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3128     if (!dst->rpl_buf)
3129         goto fail;
3130
3131     dst->poc        = src->poc;
3132     dst->ctb_count  = src->ctb_count;
3133     dst->window     = src->window;
3134     dst->flags      = src->flags;
3135     dst->sequence   = src->sequence;
3136
3137     return 0;
3138 fail:
3139     ff_hevc_unref_frame(s, dst, ~0);
3140     return AVERROR(ENOMEM);
3141 }
3142
3143 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3144 {
3145     HEVCContext       *s = avctx->priv_data;
3146     HEVCLocalContext *lc = s->HEVClc;
3147     int i;
3148
3149     pic_arrays_free(s);
3150
3151     av_freep(&s->md5_ctx);
3152
3153     for(i=0; i < s->nals_allocated; i++) {
3154         av_freep(&s->skipped_bytes_pos_nal[i]);
3155     }
3156     av_freep(&s->skipped_bytes_pos_size_nal);
3157     av_freep(&s->skipped_bytes_nal);
3158     av_freep(&s->skipped_bytes_pos_nal);
3159
3160     av_freep(&s->cabac_state);
3161
3162     av_frame_free(&s->tmp_frame);
3163     av_frame_free(&s->output_frame);
3164
3165     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3166         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3167         av_frame_free(&s->DPB[i].frame);
3168     }
3169
3170     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3171         av_buffer_unref(&s->vps_list[i]);
3172     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3173         av_buffer_unref(&s->sps_list[i]);
3174     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3175         av_buffer_unref(&s->pps_list[i]);
3176     s->sps = NULL;
3177     s->pps = NULL;
3178     s->vps = NULL;
3179
3180     av_buffer_unref(&s->current_sps);
3181
3182     av_freep(&s->sh.entry_point_offset);
3183     av_freep(&s->sh.offset);
3184     av_freep(&s->sh.size);
3185
3186     for (i = 1; i < s->threads_number; i++) {
3187         lc = s->HEVClcList[i];
3188         if (lc) {
3189             av_freep(&s->HEVClcList[i]);
3190             av_freep(&s->sList[i]);
3191         }
3192     }
3193     if (s->HEVClc == s->HEVClcList[0])
3194         s->HEVClc = NULL;
3195     av_freep(&s->HEVClcList[0]);
3196
3197     for (i = 0; i < s->nals_allocated; i++)
3198         av_freep(&s->nals[i].rbsp_buffer);
3199     av_freep(&s->nals);
3200     s->nals_allocated = 0;
3201
3202     return 0;
3203 }
3204
3205 static av_cold int hevc_init_context(AVCodecContext *avctx)
3206 {
3207     HEVCContext *s = avctx->priv_data;
3208     int i;
3209
3210     s->avctx = avctx;
3211
3212     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3213     if (!s->HEVClc)
3214         goto fail;
3215     s->HEVClcList[0] = s->HEVClc;
3216     s->sList[0] = s;
3217
3218     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3219     if (!s->cabac_state)
3220         goto fail;
3221
3222     s->tmp_frame = av_frame_alloc();
3223     if (!s->tmp_frame)
3224         goto fail;
3225
3226     s->output_frame = av_frame_alloc();
3227     if (!s->output_frame)
3228         goto fail;
3229
3230     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3231         s->DPB[i].frame = av_frame_alloc();
3232         if (!s->DPB[i].frame)
3233             goto fail;
3234         s->DPB[i].tf.f = s->DPB[i].frame;
3235     }
3236
3237     s->max_ra = INT_MAX;
3238
3239     s->md5_ctx = av_md5_alloc();
3240     if (!s->md5_ctx)
3241         goto fail;
3242
3243     ff_bswapdsp_init(&s->bdsp);
3244
3245     s->context_initialized = 1;
3246     s->eos = 0;
3247
3248     return 0;
3249
3250 fail:
3251     hevc_decode_free(avctx);
3252     return AVERROR(ENOMEM);
3253 }
3254
3255 static int hevc_update_thread_context(AVCodecContext *dst,
3256                                       const AVCodecContext *src)
3257 {
3258     HEVCContext *s  = dst->priv_data;
3259     HEVCContext *s0 = src->priv_data;
3260     int i, ret;
3261
3262     if (!s->context_initialized) {
3263         ret = hevc_init_context(dst);
3264         if (ret < 0)
3265             return ret;
3266     }
3267
3268     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3269         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3270         if (s0->DPB[i].frame->buf[0]) {
3271             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3272             if (ret < 0)
3273                 return ret;
3274         }
3275     }
3276
3277     if (s->sps != s0->sps)
3278         s->sps = NULL;
3279     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3280         av_buffer_unref(&s->vps_list[i]);
3281         if (s0->vps_list[i]) {
3282             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3283             if (!s->vps_list[i])
3284                 return AVERROR(ENOMEM);
3285         }
3286     }
3287
3288     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3289         av_buffer_unref(&s->sps_list[i]);
3290         if (s0->sps_list[i]) {
3291             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3292             if (!s->sps_list[i])
3293                 return AVERROR(ENOMEM);
3294         }
3295     }
3296
3297     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3298         av_buffer_unref(&s->pps_list[i]);
3299         if (s0->pps_list[i]) {
3300             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3301             if (!s->pps_list[i])
3302                 return AVERROR(ENOMEM);
3303         }
3304     }
3305
3306     av_buffer_unref(&s->current_sps);
3307     if (s0->current_sps) {
3308         s->current_sps = av_buffer_ref(s0->current_sps);
3309         if (!s->current_sps)
3310             return AVERROR(ENOMEM);
3311     }
3312
3313     if (s->sps != s0->sps)
3314         ret = set_sps(s, s0->sps);
3315
3316     s->seq_decode = s0->seq_decode;
3317     s->seq_output = s0->seq_output;
3318     s->pocTid0    = s0->pocTid0;
3319     s->max_ra     = s0->max_ra;
3320     s->eos        = s0->eos;
3321
3322     s->is_nalff        = s0->is_nalff;
3323     s->nal_length_size = s0->nal_length_size;
3324
3325     s->threads_number      = s0->threads_number;
3326     s->threads_type        = s0->threads_type;
3327
3328     if (s0->eos) {
3329         s->seq_decode = (s->seq_decode + 1) & 0xff;
3330         s->max_ra = INT_MAX;
3331     }
3332
3333     return 0;
3334 }
3335
3336 static int hevc_decode_extradata(HEVCContext *s)
3337 {
3338     AVCodecContext *avctx = s->avctx;
3339     GetByteContext gb;
3340     int ret;
3341
3342     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3343
3344     if (avctx->extradata_size > 3 &&
3345         (avctx->extradata[0] || avctx->extradata[1] ||
3346          avctx->extradata[2] > 1)) {
3347         /* It seems the extradata is encoded as hvcC format.
3348          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3349          * is finalized. When finalized, configurationVersion will be 1 and we
3350          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3351         int i, j, num_arrays, nal_len_size;
3352
3353         s->is_nalff = 1;
3354
3355         bytestream2_skip(&gb, 21);
3356         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3357         num_arrays   = bytestream2_get_byte(&gb);
3358
3359         /* nal units in the hvcC always have length coded with 2 bytes,
3360          * so put a fake nal_length_size = 2 while parsing them */
3361         s->nal_length_size = 2;
3362
3363         /* Decode nal units from hvcC. */
3364         for (i = 0; i < num_arrays; i++) {
3365             int type = bytestream2_get_byte(&gb) & 0x3f;
3366             int cnt  = bytestream2_get_be16(&gb);
3367
3368             for (j = 0; j < cnt; j++) {
3369                 // +2 for the nal size field
3370                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3371                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3372                     av_log(s->avctx, AV_LOG_ERROR,
3373                            "Invalid NAL unit size in extradata.\n");
3374                     return AVERROR_INVALIDDATA;
3375                 }
3376
3377                 ret = decode_nal_units(s, gb.buffer, nalsize);
3378                 if (ret < 0) {
3379                     av_log(avctx, AV_LOG_ERROR,
3380                            "Decoding nal unit %d %d from hvcC failed\n",
3381                            type, i);
3382                     return ret;
3383                 }
3384                 bytestream2_skip(&gb, nalsize);
3385             }
3386         }
3387
3388         /* Now store right nal length size, that will be used to parse
3389          * all other nals */
3390         s->nal_length_size = nal_len_size;
3391     } else {
3392         s->is_nalff = 0;
3393         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3394         if (ret < 0)
3395             return ret;
3396     }
3397     return 0;
3398 }
3399
3400 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3401 {
3402     HEVCContext *s = avctx->priv_data;
3403     int ret;
3404
3405     ff_init_cabac_states();
3406
3407     avctx->internal->allocate_progress = 1;
3408
3409     ret = hevc_init_context(avctx);
3410     if (ret < 0)
3411         return ret;
3412
3413     s->enable_parallel_tiles = 0;
3414     s->picture_struct = 0;
3415
3416     if(avctx->active_thread_type & FF_THREAD_SLICE)
3417         s->threads_number = avctx->thread_count;
3418     else
3419         s->threads_number = 1;
3420
3421     if (avctx->extradata_size > 0 && avctx->extradata) {
3422         ret = hevc_decode_extradata(s);
3423         if (ret < 0) {
3424             hevc_decode_free(avctx);
3425             return ret;
3426         }
3427     }
3428
3429     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3430             s->threads_type = FF_THREAD_FRAME;
3431         else
3432             s->threads_type = FF_THREAD_SLICE;
3433
3434     return 0;
3435 }
3436
3437 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3438 {
3439     HEVCContext *s = avctx->priv_data;
3440     int ret;
3441
3442     memset(s, 0, sizeof(*s));
3443
3444     ret = hevc_init_context(avctx);
3445     if (ret < 0)
3446         return ret;
3447
3448     return 0;
3449 }
3450
3451 static void hevc_decode_flush(AVCodecContext *avctx)
3452 {
3453     HEVCContext *s = avctx->priv_data;
3454     ff_hevc_flush_dpb(s);
3455     s->max_ra = INT_MAX;
3456 }
3457
3458 #define OFFSET(x) offsetof(HEVCContext, x)
3459 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3460
3461 static const AVProfile profiles[] = {
3462     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3463     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3464     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3465     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3466     { FF_PROFILE_UNKNOWN },
3467 };
3468
3469 static const AVOption options[] = {
3470     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3471         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3472     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3473         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3474     { NULL },
3475 };
3476
3477 static const AVClass hevc_decoder_class = {
3478     .class_name = "HEVC decoder",
3479     .item_name  = av_default_item_name,
3480     .option     = options,
3481     .version    = LIBAVUTIL_VERSION_INT,
3482 };
3483
3484 AVCodec ff_hevc_decoder = {
3485     .name                  = "hevc",
3486     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3487     .type                  = AVMEDIA_TYPE_VIDEO,
3488     .id                    = AV_CODEC_ID_HEVC,
3489     .priv_data_size        = sizeof(HEVCContext),
3490     .priv_class            = &hevc_decoder_class,
3491     .init                  = hevc_decode_init,
3492     .close                 = hevc_decode_free,
3493     .decode                = hevc_decode_frame,
3494     .flush                 = hevc_decode_flush,
3495     .update_thread_context = hevc_update_thread_context,
3496     .init_thread_copy      = hevc_init_thread_copy,
3497     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3498                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3499     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3500 };