git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc(sps->min_cb_height * sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc((sps->min_pu_width + 1) * (sps->min_pu_height + 1));
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_malloc(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146
 147     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
 148     if (s->sps->chroma_format_idc != 0) {
 149         int delta = get_se_golomb(gb);
 150         s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
 151     }
 152
 153     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 154         luma_weight_l0_flag[i] = get_bits1(gb);
 155         if (!luma_weight_l0_flag[i]) {
 156             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 157             s->sh.luma_offset_l0[i] = 0;
 158         }
 159     }
 160     if (s->sps->chroma_format_idc != 0) {
 161         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 162             chroma_weight_l0_flag[i] = get_bits1(gb);
 163     } else {
 164         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 165             chroma_weight_l0_flag[i] = 0;
 166     }
 167     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 168         if (luma_weight_l0_flag[i]) {
 169             int delta_luma_weight_l0 = get_se_golomb(gb);
 170             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 171             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 172         }
 173         if (chroma_weight_l0_flag[i]) {
 174             for (j = 0; j < 2; j++) {
 175                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 176                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 177                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 178                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 179                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 180             }
 181         } else {
 182             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 183             s->sh.chroma_offset_l0[i][0] = 0;
 184             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 185             s->sh.chroma_offset_l0[i][1] = 0;
 186         }
 187     }
 188     if (s->sh.slice_type == B_SLICE) {
 189         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 190             luma_weight_l1_flag[i] = get_bits1(gb);
 191             if (!luma_weight_l1_flag[i]) {
 192                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 193                 s->sh.luma_offset_l1[i] = 0;
 194             }
 195         }
 196         if (s->sps->chroma_format_idc != 0) {
 197             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 198                 chroma_weight_l1_flag[i] = get_bits1(gb);
 199         } else {
 200             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 201                 chroma_weight_l1_flag[i] = 0;
 202         }
 203         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 204             if (luma_weight_l1_flag[i]) {
 205                 int delta_luma_weight_l1 = get_se_golomb(gb);
 206                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 207                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 208             }
 209             if (chroma_weight_l1_flag[i]) {
 210                 for (j = 0; j < 2; j++) {
 211                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 212                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 213                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 214                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 215                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 216                 }
 217             } else {
 218                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 219                 s->sh.chroma_offset_l1[i][0] = 0;
 220                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 221                 s->sh.chroma_offset_l1[i][1] = 0;
 222             }
 223         }
 224     }
 225 }
 226
 227 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 228 {
 229     const HEVCSPS *sps = s->sps;
 230     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 231     int prev_delta_msb = 0;
 232     unsigned int nb_sps = 0, nb_sh;
 233     int i;
 234
 235     rps->nb_refs = 0;
 236     if (!sps->long_term_ref_pics_present_flag)
 237         return 0;
 238
 239     if (sps->num_long_term_ref_pics_sps > 0)
 240         nb_sps = get_ue_golomb_long(gb);
 241     nb_sh = get_ue_golomb_long(gb);
 242
 243     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 244         return AVERROR_INVALIDDATA;
 245
 246     rps->nb_refs = nb_sh + nb_sps;
 247
 248     for (i = 0; i < rps->nb_refs; i++) {
 249         uint8_t delta_poc_msb_present;
 250
 251         if (i < nb_sps) {
 252             uint8_t lt_idx_sps = 0;
 253
 254             if (sps->num_long_term_ref_pics_sps > 1)
 255                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 256
 257             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 258             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 259         } else {
 260             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 261             rps->used[i] = get_bits1(gb);
 262         }
 263
 264         delta_poc_msb_present = get_bits1(gb);
 265         if (delta_poc_msb_present) {
 266             int delta = get_ue_golomb_long(gb);
 267
 268             if (i && i != nb_sps)
 269                 delta += prev_delta_msb;
 270
 271             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 272             prev_delta_msb = delta;
 273         }
 274     }
 275
 276     return 0;
 277 }
 278
 279 static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
 280 {
 281     int ret, i;
 282
 283     frame->width  = s->avctx->coded_width  + 2;
 284     frame->height = s->avctx->coded_height + 2;
 285     if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
 286         return ret;
 287     for (i = 0; frame->data[i]; i++) {
 288         int offset = frame->linesize[i] + (1 << sps->pixel_shift);
 289         frame->data[i] += offset;
 290     }
 291     frame->width  = s->avctx->coded_width;
 292     frame->height = s->avctx->coded_height;
 293
 294     return 0;
 295 }
 296
 297 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 298 {
 299     int ret;
 300     unsigned int num = 0, den = 0;
 301
 302     pic_arrays_free(s);
 303     ret = pic_arrays_init(s, sps);
 304     if (ret < 0)
 305         goto fail;
 306
 307     s->avctx->coded_width         = sps->width;
 308     s->avctx->coded_height        = sps->height;
 309     s->avctx->width               = sps->output_width;
 310     s->avctx->height              = sps->output_height;
 311     s->avctx->pix_fmt             = sps->pix_fmt;
 312     s->avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 313
 314     ff_set_sar(s->avctx, sps->vui.sar);
 315
 316     if (sps->vui.video_signal_type_present_flag)
 317         s->avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 318                                                                : AVCOL_RANGE_MPEG;
 319     else
 320         s->avctx->color_range = AVCOL_RANGE_MPEG;
 321
 322     if (sps->vui.colour_description_present_flag) {
 323         s->avctx->color_primaries = sps->vui.colour_primaries;
 324         s->avctx->color_trc       = sps->vui.transfer_characteristic;
 325         s->avctx->colorspace      = sps->vui.matrix_coeffs;
 326     } else {
 327         s->avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 328         s->avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 329         s->avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 330     }
 331
 332     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 333     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 334     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 335
 336     if (sps->sao_enabled) {
 337         av_frame_unref(s->tmp_frame);
 338         ret = get_buffer_sao(s, s->tmp_frame, sps);
 339         s->sao_frame = s->tmp_frame;
 340     }
 341
 342     s->sps = sps;
 343     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 344
 345     if (s->vps->vps_timing_info_present_flag) {
 346         num = s->vps->vps_num_units_in_tick;
 347         den = s->vps->vps_time_scale;
 348     } else if (sps->vui.vui_timing_info_present_flag) {
 349         num = sps->vui.vui_num_units_in_tick;
 350         den = sps->vui.vui_time_scale;
 351     }
 352
 353     if (num != 0 && den != 0)
 354         av_reduce(&s->avctx->framerate.den, &s->avctx->framerate.num,
 355                   num, den, 1 << 30);
 356
 357     return 0;
 358
 359 fail:
 360     pic_arrays_free(s);
 361     s->sps = NULL;
 362     return ret;
 363 }
 364
 365 static int hls_slice_header(HEVCContext *s)
 366 {
 367     GetBitContext *gb = &s->HEVClc->gb;
 368     SliceHeader *sh   = &s->sh;
 369     int i, j, ret;
 370
 371     // Coded parameters
 372     sh->first_slice_in_pic_flag = get_bits1(gb);
 373     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 374         s->seq_decode = (s->seq_decode + 1) & 0xff;
 375         s->max_ra     = INT_MAX;
 376         if (IS_IDR(s))
 377             ff_hevc_clear_refs(s);
 378     }
 379     sh->no_output_of_prior_pics_flag = 0;
 380     if (IS_IRAP(s))
 381         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 382
 383     sh->pps_id = get_ue_golomb_long(gb);
 384     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 385         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 386         return AVERROR_INVALIDDATA;
 387     }
 388     if (!sh->first_slice_in_pic_flag &&
 389         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 390         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 391         return AVERROR_INVALIDDATA;
 392     }
 393     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 394     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 395         sh->no_output_of_prior_pics_flag = 1;
 396
 397     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 398         const HEVCSPS* last_sps = s->sps;
 399         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 400         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 401             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 402                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 403                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 404                 sh->no_output_of_prior_pics_flag = 0;
 405         }
 406         ff_hevc_clear_refs(s);
 407         ret = set_sps(s, s->sps);
 408         if (ret < 0)
 409             return ret;
 410
 411         s->seq_decode = (s->seq_decode + 1) & 0xff;
 412         s->max_ra     = INT_MAX;
 413     }
 414
 415     s->avctx->profile = s->sps->ptl.general_ptl.profile_idc;
 416     s->avctx->level   = s->sps->ptl.general_ptl.level_idc;
 417
 418     sh->dependent_slice_segment_flag = 0;
 419     if (!sh->first_slice_in_pic_flag) {
 420         int slice_address_length;
 421
 422         if (s->pps->dependent_slice_segments_enabled_flag)
 423             sh->dependent_slice_segment_flag = get_bits1(gb);
 424
 425         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 426                                             s->sps->ctb_height);
 427         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 428         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 429             av_log(s->avctx, AV_LOG_ERROR,
 430                    "Invalid slice segment address: %u.\n",
 431                    sh->slice_segment_addr);
 432             return AVERROR_INVALIDDATA;
 433         }
 434
 435         if (!sh->dependent_slice_segment_flag) {
 436             sh->slice_addr = sh->slice_segment_addr;
 437             s->slice_idx++;
 438         }
 439     } else {
 440         sh->slice_segment_addr = sh->slice_addr = 0;
 441         s->slice_idx           = 0;
 442         s->slice_initialized   = 0;
 443     }
 444
 445     if (!sh->dependent_slice_segment_flag) {
 446         s->slice_initialized = 0;
 447
 448         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 449             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 450
 451         sh->slice_type = get_ue_golomb_long(gb);
 452         if (!(sh->slice_type == I_SLICE ||
 453               sh->slice_type == P_SLICE ||
 454               sh->slice_type == B_SLICE)) {
 455             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 456                    sh->slice_type);
 457             return AVERROR_INVALIDDATA;
 458         }
 459         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 460             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 461             return AVERROR_INVALIDDATA;
 462         }
 463
 464         // when flag is not present, picture is inferred to be output
 465         sh->pic_output_flag = 1;
 466         if (s->pps->output_flag_present_flag)
 467             sh->pic_output_flag = get_bits1(gb);
 468
 469         if (s->sps->separate_colour_plane_flag)
 470             sh->colour_plane_id = get_bits(gb, 2);
 471
 472         if (!IS_IDR(s)) {
 473             int short_term_ref_pic_set_sps_flag, poc;
 474
 475             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 476             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 477             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 478                 av_log(s->avctx, AV_LOG_WARNING,
 479                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 480                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 481                     return AVERROR_INVALIDDATA;
 482                 poc = s->poc;
 483             }
 484             s->poc = poc;
 485
 486             short_term_ref_pic_set_sps_flag = get_bits1(gb);
 487             if (!short_term_ref_pic_set_sps_flag) {
 488                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 489                 if (ret < 0)
 490                     return ret;
 491
 492                 sh->short_term_rps = &sh->slice_rps;
 493             } else {
 494                 int numbits, rps_idx;
 495
 496                 if (!s->sps->nb_st_rps) {
 497                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 498                     return AVERROR_INVALIDDATA;
 499                 }
 500
 501                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 502                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 503                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 504             }
 505
 506             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 507             if (ret < 0) {
 508                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 509                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 510                     return AVERROR_INVALIDDATA;
 511             }
 512
 513             if (s->sps->sps_temporal_mvp_enabled_flag)
 514                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 515             else
 516                 sh->slice_temporal_mvp_enabled_flag = 0;
 517         } else {
 518             s->sh.short_term_rps = NULL;
 519             s->poc               = 0;
 520         }
 521
 522         /* 8.3.1 */
 523         if (s->temporal_id == 0 &&
 524             s->nal_unit_type != NAL_TRAIL_N &&
 525             s->nal_unit_type != NAL_TSA_N   &&
 526             s->nal_unit_type != NAL_STSA_N  &&
 527             s->nal_unit_type != NAL_RADL_N  &&
 528             s->nal_unit_type != NAL_RADL_R  &&
 529             s->nal_unit_type != NAL_RASL_N  &&
 530             s->nal_unit_type != NAL_RASL_R)
 531             s->pocTid0 = s->poc;
 532
 533         if (s->sps->sao_enabled) {
 534             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 535             sh->slice_sample_adaptive_offset_flag[1] =
 536             sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 537         } else {
 538             sh->slice_sample_adaptive_offset_flag[0] = 0;
 539             sh->slice_sample_adaptive_offset_flag[1] = 0;
 540             sh->slice_sample_adaptive_offset_flag[2] = 0;
 541         }
 542
 543         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 544         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 545             int nb_refs;
 546
 547             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 548             if (sh->slice_type == B_SLICE)
 549                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 550
 551             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 552                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 553                 if (sh->slice_type == B_SLICE)
 554                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 555             }
 556             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 557                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 558                        sh->nb_refs[L0], sh->nb_refs[L1]);
 559                 return AVERROR_INVALIDDATA;
 560             }
 561
 562             sh->rpl_modification_flag[0] = 0;
 563             sh->rpl_modification_flag[1] = 0;
 564             nb_refs = ff_hevc_frame_nb_refs(s);
 565             if (!nb_refs) {
 566                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 567                 return AVERROR_INVALIDDATA;
 568             }
 569
 570             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 571                 sh->rpl_modification_flag[0] = get_bits1(gb);
 572                 if (sh->rpl_modification_flag[0]) {
 573                     for (i = 0; i < sh->nb_refs[L0]; i++)
 574                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 575                 }
 576
 577                 if (sh->slice_type == B_SLICE) {
 578                     sh->rpl_modification_flag[1] = get_bits1(gb);
 579                     if (sh->rpl_modification_flag[1] == 1)
 580                         for (i = 0; i < sh->nb_refs[L1]; i++)
 581                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 582                 }
 583             }
 584
 585             if (sh->slice_type == B_SLICE)
 586                 sh->mvd_l1_zero_flag = get_bits1(gb);
 587
 588             if (s->pps->cabac_init_present_flag)
 589                 sh->cabac_init_flag = get_bits1(gb);
 590             else
 591                 sh->cabac_init_flag = 0;
 592
 593             sh->collocated_ref_idx = 0;
 594             if (sh->slice_temporal_mvp_enabled_flag) {
 595                 sh->collocated_list = L0;
 596                 if (sh->slice_type == B_SLICE)
 597                     sh->collocated_list = !get_bits1(gb);
 598
 599                 if (sh->nb_refs[sh->collocated_list] > 1) {
 600                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 601                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 602                         av_log(s->avctx, AV_LOG_ERROR,
 603                                "Invalid collocated_ref_idx: %d.\n",
 604                                sh->collocated_ref_idx);
 605                         return AVERROR_INVALIDDATA;
 606                     }
 607                 }
 608             }
 609
 610             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 611                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 612                 pred_weight_table(s, gb);
 613             }
 614
 615             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 616             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 617                 av_log(s->avctx, AV_LOG_ERROR,
 618                        "Invalid number of merging MVP candidates: %d.\n",
 619                        sh->max_num_merge_cand);
 620                 return AVERROR_INVALIDDATA;
 621             }
 622         }
 623
 624         sh->slice_qp_delta = get_se_golomb(gb);
 625
 626         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 627             sh->slice_cb_qp_offset = get_se_golomb(gb);
 628             sh->slice_cr_qp_offset = get_se_golomb(gb);
 629         } else {
 630             sh->slice_cb_qp_offset = 0;
 631             sh->slice_cr_qp_offset = 0;
 632         }
 633
 634         if (s->pps->chroma_qp_offset_list_enabled_flag)
 635             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 636         else
 637             sh->cu_chroma_qp_offset_enabled_flag = 0;
 638
 639         if (s->pps->deblocking_filter_control_present_flag) {
 640             int deblocking_filter_override_flag = 0;
 641
 642             if (s->pps->deblocking_filter_override_enabled_flag)
 643                 deblocking_filter_override_flag = get_bits1(gb);
 644
 645             if (deblocking_filter_override_flag) {
 646                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 647                 if (!sh->disable_deblocking_filter_flag) {
 648                     sh->beta_offset = get_se_golomb(gb) * 2;
 649                     sh->tc_offset   = get_se_golomb(gb) * 2;
 650                 }
 651             } else {
 652                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 653                 sh->beta_offset                    = s->pps->beta_offset;
 654                 sh->tc_offset                      = s->pps->tc_offset;
 655             }
 656         } else {
 657             sh->disable_deblocking_filter_flag = 0;
 658             sh->beta_offset                    = 0;
 659             sh->tc_offset                      = 0;
 660         }
 661
 662         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 663             (sh->slice_sample_adaptive_offset_flag[0] ||
 664              sh->slice_sample_adaptive_offset_flag[1] ||
 665              !sh->disable_deblocking_filter_flag)) {
 666             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 667         } else {
 668             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 669         }
 670     } else if (!s->slice_initialized) {
 671         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 672         return AVERROR_INVALIDDATA;
 673     }
 674
 675     sh->num_entry_point_offsets = 0;
 676     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 677         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 678         if (sh->num_entry_point_offsets > 0) {
 679             int offset_len = get_ue_golomb_long(gb) + 1;
 680             int segments = offset_len >> 4;
 681             int rest = (offset_len & 15);
 682             av_freep(&sh->entry_point_offset);
 683             av_freep(&sh->offset);
 684             av_freep(&sh->size);
 685             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 686             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 687             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 688             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 689                 sh->num_entry_point_offsets = 0;
 690                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 691                 return AVERROR(ENOMEM);
 692             }
 693             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 694                 int val = 0;
 695                 for (j = 0; j < segments; j++) {
 696                     val <<= 16;
 697                     val += get_bits(gb, 16);
 698                 }
 699                 if (rest) {
 700                     val <<= rest;
 701                     val += get_bits(gb, rest);
 702                 }
 703                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 704             }
 705             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 706                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 707                 s->threads_number = 1;
 708             } else
 709                 s->enable_parallel_tiles = 0;
 710         } else
 711             s->enable_parallel_tiles = 0;
 712     }
 713
 714     if (s->pps->slice_header_extension_present_flag) {
 715         unsigned int length = get_ue_golomb_long(gb);
 716         if (length*8LL > get_bits_left(gb)) {
 717             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 718             return AVERROR_INVALIDDATA;
 719         }
 720         for (i = 0; i < length; i++)
 721             skip_bits(gb, 8);  // slice_header_extension_data_byte
 722     }
 723
 724     // Inferred parameters
 725     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 726     if (sh->slice_qp > 51 ||
 727         sh->slice_qp < -s->sps->qp_bd_offset) {
 728         av_log(s->avctx, AV_LOG_ERROR,
 729                "The slice_qp %d is outside the valid range "
 730                "[%d, 51].\n",
 731                sh->slice_qp,
 732                -s->sps->qp_bd_offset);
 733         return AVERROR_INVALIDDATA;
 734     }
 735
 736     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 737
 738     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 739         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 740         return AVERROR_INVALIDDATA;
 741     }
 742
 743     if (get_bits_left(gb) < 0) {
 744         av_log(s->avctx, AV_LOG_ERROR,
 745                "Overread slice header by %d bits\n", -get_bits_left(gb));
 746         return AVERROR_INVALIDDATA;
 747     }
 748
 749     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 750
 751     if (!s->pps->cu_qp_delta_enabled_flag)
 752         s->HEVClc->qp_y = s->sh.slice_qp;
 753
 754     s->slice_initialized = 1;
 755     s->HEVClc->tu.cu_qp_offset_cb = 0;
 756     s->HEVClc->tu.cu_qp_offset_cr = 0;
 757
 758     return 0;
 759 }
 760
 761 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 762
 763 #define SET_SAO(elem, value)                            \
 764 do {                                                    \
 765     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 766         sao->elem = value;                              \
 767     else if (sao_merge_left_flag)                       \
 768         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 769     else if (sao_merge_up_flag)                         \
 770         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 771     else                                                \
 772         sao->elem = 0;                                  \
 773 } while (0)
 774
 775 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 776 {
 777     HEVCLocalContext *lc    = s->HEVClc;
 778     int sao_merge_left_flag = 0;
 779     int sao_merge_up_flag   = 0;
 780     SAOParams *sao          = &CTB(s->sao, rx, ry);
 781     int c_idx, i;
 782
 783     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 784         s->sh.slice_sample_adaptive_offset_flag[1]) {
 785         if (rx > 0) {
 786             if (lc->ctb_left_flag)
 787                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 788         }
 789         if (ry > 0 && !sao_merge_left_flag) {
 790             if (lc->ctb_up_flag)
 791                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 792         }
 793     }
 794
 795     for (c_idx = 0; c_idx < 3; c_idx++) {
 796         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 797                                                  s->pps->log2_sao_offset_scale_chroma;
 798
 799         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 800             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 801             continue;
 802         }
 803
 804         if (c_idx == 2) {
 805             sao->type_idx[2] = sao->type_idx[1];
 806             sao->eo_class[2] = sao->eo_class[1];
 807         } else {
 808             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 809         }
 810
 811         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 812             continue;
 813
 814         for (i = 0; i < 4; i++)
 815             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 816
 817         if (sao->type_idx[c_idx] == SAO_BAND) {
 818             for (i = 0; i < 4; i++) {
 819                 if (sao->offset_abs[c_idx][i]) {
 820                     SET_SAO(offset_sign[c_idx][i],
 821                             ff_hevc_sao_offset_sign_decode(s));
 822                 } else {
 823                     sao->offset_sign[c_idx][i] = 0;
 824                 }
 825             }
 826             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 827         } else if (c_idx != 2) {
 828             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 829         }
 830
 831         // Inferred parameters
 832         sao->offset_val[c_idx][0] = 0;
 833         for (i = 0; i < 4; i++) {
 834             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 835             if (sao->type_idx[c_idx] == SAO_EDGE) {
 836                 if (i > 1)
 837                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 838             } else if (sao->offset_sign[c_idx][i]) {
 839                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 840             }
 841             sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
 842         }
 843     }
 844 }
 845
 846 #undef SET_SAO
 847 #undef CTB
 848
 849 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 850     HEVCLocalContext *lc    = s->HEVClc;
 851     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 852
 853     if (log2_res_scale_abs_plus1 !=  0) {
 854         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 855         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 856                                (1 - 2 * res_scale_sign_flag);
 857     } else {
 858         lc->tu.res_scale_val = 0;
 859     }
 860
 861
 862     return 0;
 863 }
 864
 865 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 866                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 867                               int log2_cb_size, int log2_trafo_size,
 868                               int trafo_depth, int blk_idx,
 869                               int cbf_luma, int *cbf_cb, int *cbf_cr)
 870 {
 871     HEVCLocalContext *lc = s->HEVClc;
 872     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 873     int i;
 874
 875     if (lc->cu.pred_mode == MODE_INTRA) {
 876         int trafo_size = 1 << log2_trafo_size;
 877         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 878
 879         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 880     }
 881
 882     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 883         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 884         int scan_idx   = SCAN_DIAG;
 885         int scan_idx_c = SCAN_DIAG;
 886         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 887                          (s->sps->chroma_format_idc == 2 &&
 888                          (cbf_cb[1] || cbf_cr[1]));
 889
 890         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 891             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 892             if (lc->tu.cu_qp_delta != 0)
 893                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 894                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 895             lc->tu.is_cu_qp_delta_coded = 1;
 896
 897             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 898                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 899                 av_log(s->avctx, AV_LOG_ERROR,
 900                        "The cu_qp_delta %d is outside the valid range "
 901                        "[%d, %d].\n",
 902                        lc->tu.cu_qp_delta,
 903                        -(26 + s->sps->qp_bd_offset / 2),
 904                         (25 + s->sps->qp_bd_offset / 2));
 905                 return AVERROR_INVALIDDATA;
 906             }
 907
 908             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 909         }
 910
 911         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 912             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 913             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 914             if (cu_chroma_qp_offset_flag) {
 915                 int cu_chroma_qp_offset_idx  = 0;
 916                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 917                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 918                     av_log(s->avctx, AV_LOG_ERROR,
 919                         "cu_chroma_qp_offset_idx not yet tested.\n");
 920                 }
 921                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 922                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 923             } else {
 924                 lc->tu.cu_qp_offset_cb = 0;
 925                 lc->tu.cu_qp_offset_cr = 0;
 926             }
 927             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 928         }
 929
 930         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 931             if (lc->tu.intra_pred_mode >= 6 &&
 932                 lc->tu.intra_pred_mode <= 14) {
 933                 scan_idx = SCAN_VERT;
 934             } else if (lc->tu.intra_pred_mode >= 22 &&
 935                        lc->tu.intra_pred_mode <= 30) {
 936                 scan_idx = SCAN_HORIZ;
 937             }
 938
 939             if (lc->tu.intra_pred_mode_c >=  6 &&
 940                 lc->tu.intra_pred_mode_c <= 14) {
 941                 scan_idx_c = SCAN_VERT;
 942             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 943                        lc->tu.intra_pred_mode_c <= 30) {
 944                 scan_idx_c = SCAN_HORIZ;
 945             }
 946         }
 947
 948         lc->tu.cross_pf = 0;
 949
 950         if (cbf_luma)
 951             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 952         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
 953             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 954             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 955             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 956                                 (lc->cu.pred_mode == MODE_INTER ||
 957                                  (lc->tu.chroma_mode_c ==  4)));
 958
 959             if (lc->tu.cross_pf) {
 960                 hls_cross_component_pred(s, 0);
 961             }
 962             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 963                 if (lc->cu.pred_mode == MODE_INTRA) {
 964                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 965                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 966                 }
 967                 if (cbf_cb[i])
 968                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 969                                                 log2_trafo_size_c, scan_idx_c, 1);
 970                 else
 971                     if (lc->tu.cross_pf) {
 972                         ptrdiff_t stride = s->frame->linesize[1];
 973                         int hshift = s->sps->hshift[1];
 974                         int vshift = s->sps->vshift[1];
 975                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
 976                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
 977                         int size = 1 << log2_trafo_size_c;
 978
 979                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
 980                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
 981                         for (i = 0; i < (size * size); i++) {
 982                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 983                         }
 984                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
 985                     }
 986             }
 987
 988             if (lc->tu.cross_pf) {
 989                 hls_cross_component_pred(s, 1);
 990             }
 991             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 992                 if (lc->cu.pred_mode == MODE_INTRA) {
 993                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 994                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
 995                 }
 996                 if (cbf_cr[i])
 997                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
 998                                                 log2_trafo_size_c, scan_idx_c, 2);
 999                 else
1000                     if (lc->tu.cross_pf) {
1001                         ptrdiff_t stride = s->frame->linesize[2];
1002                         int hshift = s->sps->hshift[2];
1003                         int vshift = s->sps->vshift[2];
1004                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1005                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1006                         int size = 1 << log2_trafo_size_c;
1007
1008                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1009                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1010                         for (i = 0; i < (size * size); i++) {
1011                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1012                         }
1013                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1014                     }
1015             }
1016         } else if (blk_idx == 3) {
1017             int trafo_size_h = 1 << (log2_trafo_size + 1);
1018             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1019             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1020                 if (lc->cu.pred_mode == MODE_INTRA) {
1021                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1022                                                     trafo_size_h, trafo_size_v);
1023                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1024                 }
1025                 if (cbf_cb[i])
1026                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1027                                                 log2_trafo_size, scan_idx_c, 1);
1028             }
1029             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1030                 if (lc->cu.pred_mode == MODE_INTRA) {
1031                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1032                                                 trafo_size_h, trafo_size_v);
1033                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1034                 }
1035                 if (cbf_cr[i])
1036                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1037                                                 log2_trafo_size, scan_idx_c, 2);
1038             }
1039         }
1040     } else if (lc->cu.pred_mode == MODE_INTRA) {
1041         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1042             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1043             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1044             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1045             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1046             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1047             if (s->sps->chroma_format_idc == 2) {
1048                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1049                                                 trafo_size_h, trafo_size_v);
1050                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1051                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1052             }
1053         } else if (blk_idx == 3) {
1054             int trafo_size_h = 1 << (log2_trafo_size + 1);
1055             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1056             ff_hevc_set_neighbour_available(s, xBase, yBase,
1057                                             trafo_size_h, trafo_size_v);
1058             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1059             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1060             if (s->sps->chroma_format_idc == 2) {
1061                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1062                                                 trafo_size_h, trafo_size_v);
1063                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1064                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1065             }
1066         }
1067     }
1068
1069     return 0;
1070 }
1071
1072 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1073 {
1074     int cb_size          = 1 << log2_cb_size;
1075     int log2_min_pu_size = s->sps->log2_min_pu_size;
1076
1077     int min_pu_width     = s->sps->min_pu_width;
1078     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1079     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1080     int i, j;
1081
1082     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1083         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1084             s->is_pcm[i + j * min_pu_width] = 2;
1085 }
1086
1087 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1088                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1089                               int log2_cb_size, int log2_trafo_size,
1090                               int trafo_depth, int blk_idx,
1091                               const int *base_cbf_cb, const int *base_cbf_cr)
1092 {
1093     HEVCLocalContext *lc = s->HEVClc;
1094     uint8_t split_transform_flag;
1095     int cbf_cb[2];
1096     int cbf_cr[2];
1097     int ret;
1098
1099     cbf_cb[0] = base_cbf_cb[0];
1100     cbf_cb[1] = base_cbf_cb[1];
1101     cbf_cr[0] = base_cbf_cr[0];
1102     cbf_cr[1] = base_cbf_cr[1];
1103
1104     if (lc->cu.intra_split_flag) {
1105         if (trafo_depth == 1) {
1106             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1107             if (s->sps->chroma_format_idc == 3) {
1108                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1109                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1110             } else {
1111                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1112                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1113             }
1114         }
1115     } else {
1116         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1117         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1118         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1119     }
1120
1121     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1122         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1123         trafo_depth     < lc->cu.max_trafo_depth       &&
1124         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1125         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1126     } else {
1127         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1128                           lc->cu.pred_mode == MODE_INTER &&
1129                           lc->cu.part_mode != PART_2Nx2N &&
1130                           trafo_depth == 0;
1131
1132         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1133                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1134                                inter_split;
1135     }
1136
1137     if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1138         if (trafo_depth == 0 || cbf_cb[0]) {
1139             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1140             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1141                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1142             }
1143         }
1144
1145         if (trafo_depth == 0 || cbf_cr[0]) {
1146             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1147             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1148                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1149             }
1150         }
1151     }
1152
1153     if (split_transform_flag) {
1154         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1155         const int x1 = x0 + trafo_size_split;
1156         const int y1 = y0 + trafo_size_split;
1157
1158 #define SUBDIVIDE(x, y, idx)                                                    \
1159 do {                                                                            \
1160     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1161                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1162                              cbf_cb, cbf_cr);                                   \
1163     if (ret < 0)                                                                \
1164         return ret;                                                             \
1165 } while (0)
1166
1167         SUBDIVIDE(x0, y0, 0);
1168         SUBDIVIDE(x1, y0, 1);
1169         SUBDIVIDE(x0, y1, 2);
1170         SUBDIVIDE(x1, y1, 3);
1171
1172 #undef SUBDIVIDE
1173     } else {
1174         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1175         int log2_min_tu_size = s->sps->log2_min_tb_size;
1176         int min_tu_width     = s->sps->min_tb_width;
1177         int cbf_luma         = 1;
1178
1179         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1180             cbf_cb[0] || cbf_cr[0] ||
1181             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1182             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1183         }
1184
1185         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1186                                  log2_cb_size, log2_trafo_size, trafo_depth,
1187                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1188         if (ret < 0)
1189             return ret;
1190         // TODO: store cbf_luma somewhere else
1191         if (cbf_luma) {
1192             int i, j;
1193             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1194                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1195                     int x_tu = (x0 + j) >> log2_min_tu_size;
1196                     int y_tu = (y0 + i) >> log2_min_tu_size;
1197                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1198                 }
1199         }
1200         if (!s->sh.disable_deblocking_filter_flag) {
1201             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1202             if (s->pps->transquant_bypass_enable_flag &&
1203                 lc->cu.cu_transquant_bypass_flag)
1204                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1205         }
1206     }
1207     return 0;
1208 }
1209
1210 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1211 {
1212     HEVCLocalContext *lc = s->HEVClc;
1213     GetBitContext gb;
1214     int cb_size   = 1 << log2_cb_size;
1215     int stride0   = s->frame->linesize[0];
1216     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1217     int   stride1 = s->frame->linesize[1];
1218     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1219     int   stride2 = s->frame->linesize[2];
1220     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1221
1222     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1223                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1224                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1225                           s->sps->pcm.bit_depth_chroma;
1226     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1227     int ret;
1228
1229     if (!s->sh.disable_deblocking_filter_flag)
1230         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1231
1232     ret = init_get_bits(&gb, pcm, length);
1233     if (ret < 0)
1234         return ret;
1235
1236     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1237     s->hevcdsp.put_pcm(dst1, stride1,
1238                        cb_size >> s->sps->hshift[1],
1239                        cb_size >> s->sps->vshift[1],
1240                        &gb, s->sps->pcm.bit_depth_chroma);
1241     s->hevcdsp.put_pcm(dst2, stride2,
1242                        cb_size >> s->sps->hshift[2],
1243                        cb_size >> s->sps->vshift[2],
1244                        &gb, s->sps->pcm.bit_depth_chroma);
1245     return 0;
1246 }
1247
1248 /**
1249  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1250  *
1251  * @param s HEVC decoding context
1252  * @param dst target buffer for block data at block position
1253  * @param dststride stride of the dst buffer
1254  * @param ref reference picture buffer at origin (0, 0)
1255  * @param mv motion vector (relative to block position) to get pixel data from
1256  * @param x_off horizontal position of block from origin (0, 0)
1257  * @param y_off vertical position of block from origin (0, 0)
1258  * @param block_w width of block
1259  * @param block_h height of block
1260  * @param luma_weight weighting factor applied to the luma prediction
1261  * @param luma_offset additive offset applied to the luma prediction value
1262  */
1263
1264 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1265                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1266                         int block_w, int block_h, int luma_weight, int luma_offset)
1267 {
1268     HEVCLocalContext *lc = s->HEVClc;
1269     uint8_t *src         = ref->data[0];
1270     ptrdiff_t srcstride  = ref->linesize[0];
1271     int pic_width        = s->sps->width;
1272     int pic_height       = s->sps->height;
1273     int mx               = mv->x & 3;
1274     int my               = mv->y & 3;
1275     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1276                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1277     int idx              = ff_hevc_pel_weight[block_w];
1278
1279     x_off += mv->x >> 2;
1280     y_off += mv->y >> 2;
1281     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
1282
1283     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1284         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1285         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1286         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1287         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1288         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1289
1290         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1291                                  edge_emu_stride, srcstride,
1292                                  block_w + QPEL_EXTRA,
1293                                  block_h + QPEL_EXTRA,
1294                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1295                                  pic_width, pic_height);
1296         src = lc->edge_emu_buffer + buf_offset;
1297         srcstride = edge_emu_stride;
1298     }
1299
1300     if (!weight_flag)
1301         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1302                                                       block_h, mx, my, block_w);
1303     else
1304         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1305                                                         block_h, s->sh.luma_log2_weight_denom,
1306                                                         luma_weight, luma_offset, mx, my, block_w);
1307 }
1308
1309 /**
1310  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1311  *
1312  * @param s HEVC decoding context
1313  * @param dst target buffer for block data at block position
1314  * @param dststride stride of the dst buffer
1315  * @param ref0 reference picture0 buffer at origin (0, 0)
1316  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1317  * @param x_off horizontal position of block from origin (0, 0)
1318  * @param y_off vertical position of block from origin (0, 0)
1319  * @param block_w width of block
1320  * @param block_h height of block
1321  * @param ref1 reference picture1 buffer at origin (0, 0)
1322  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1323  * @param current_mv current motion vector structure
1324  */
1325  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1326                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1327                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1328 {
1329     HEVCLocalContext *lc = s->HEVClc;
1330     ptrdiff_t src0stride  = ref0->linesize[0];
1331     ptrdiff_t src1stride  = ref1->linesize[0];
1332     int pic_width        = s->sps->width;
1333     int pic_height       = s->sps->height;
1334     int mx0              = mv0->x & 3;
1335     int my0              = mv0->y & 3;
1336     int mx1              = mv1->x & 3;
1337     int my1              = mv1->y & 3;
1338     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1339                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1340     int x_off0           = x_off + (mv0->x >> 2);
1341     int y_off0           = y_off + (mv0->y >> 2);
1342     int x_off1           = x_off + (mv1->x >> 2);
1343     int y_off1           = y_off + (mv1->y >> 2);
1344     int idx              = ff_hevc_pel_weight[block_w];
1345
1346     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1347     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1348
1349     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1350         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1351         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1352         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1353         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1354         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1355
1356         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1357                                  edge_emu_stride, src0stride,
1358                                  block_w + QPEL_EXTRA,
1359                                  block_h + QPEL_EXTRA,
1360                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1361                                  pic_width, pic_height);
1362         src0 = lc->edge_emu_buffer + buf_offset;
1363         src0stride = edge_emu_stride;
1364     }
1365
1366     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1367         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1368         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1369         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1370         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1371         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1372
1373         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1374                                  edge_emu_stride, src1stride,
1375                                  block_w + QPEL_EXTRA,
1376                                  block_h + QPEL_EXTRA,
1377                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1378                                  pic_width, pic_height);
1379         src1 = lc->edge_emu_buffer2 + buf_offset;
1380         src1stride = edge_emu_stride;
1381     }
1382
1383     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1384                                                 block_h, mx0, my0, block_w);
1385     if (!weight_flag)
1386         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1387                                                        block_h, mx1, my1, block_w);
1388     else
1389         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1390                                                          block_h, s->sh.luma_log2_weight_denom,
1391                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1392                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1393                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1394                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1395                                                          mx1, my1, block_w);
1396
1397 }
1398
1399 /**
1400  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1401  *
1402  * @param s HEVC decoding context
1403  * @param dst1 target buffer for block data at block position (U plane)
1404  * @param dst2 target buffer for block data at block position (V plane)
1405  * @param dststride stride of the dst1 and dst2 buffers
1406  * @param ref reference picture buffer at origin (0, 0)
1407  * @param mv motion vector (relative to block position) to get pixel data from
1408  * @param x_off horizontal position of block from origin (0, 0)
1409  * @param y_off vertical position of block from origin (0, 0)
1410  * @param block_w width of block
1411  * @param block_h height of block
1412  * @param chroma_weight weighting factor applied to the chroma prediction
1413  * @param chroma_offset additive offset applied to the chroma prediction value
1414  */
1415
1416 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1417                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1418                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1419 {
1420     HEVCLocalContext *lc = s->HEVClc;
1421     int pic_width        = s->sps->width >> s->sps->hshift[1];
1422     int pic_height       = s->sps->height >> s->sps->vshift[1];
1423     const Mv *mv         = &current_mv->mv[reflist];
1424     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1425                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1426     int idx              = ff_hevc_pel_weight[block_w];
1427     int hshift           = s->sps->hshift[1];
1428     int vshift           = s->sps->vshift[1];
1429     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1430     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1431     intptr_t _mx         = mx << (1 - hshift);
1432     intptr_t _my         = my << (1 - vshift);
1433
1434     x_off += mv->x >> (2 + hshift);
1435     y_off += mv->y >> (2 + vshift);
1436     src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
1437
1438     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1439         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1440         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1441         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1442         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1443         int buf_offset0 = EPEL_EXTRA_BEFORE *
1444                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1445         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1446                                  edge_emu_stride, srcstride,
1447                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1448                                  x_off - EPEL_EXTRA_BEFORE,
1449                                  y_off - EPEL_EXTRA_BEFORE,
1450                                  pic_width, pic_height);
1451
1452         src0 = lc->edge_emu_buffer + buf_offset0;
1453         srcstride = edge_emu_stride;
1454     }
1455     if (!weight_flag)
1456         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1457                                                   block_h, _mx, _my, block_w);
1458     else
1459         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1460                                                         block_h, s->sh.chroma_log2_weight_denom,
1461                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1462 }
1463
1464 /**
1465  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1466  *
1467  * @param s HEVC decoding context
1468  * @param dst target buffer for block data at block position
1469  * @param dststride stride of the dst buffer
1470  * @param ref0 reference picture0 buffer at origin (0, 0)
1471  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1472  * @param x_off horizontal position of block from origin (0, 0)
1473  * @param y_off vertical position of block from origin (0, 0)
1474  * @param block_w width of block
1475  * @param block_h height of block
1476  * @param ref1 reference picture1 buffer at origin (0, 0)
1477  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1478  * @param current_mv current motion vector structure
1479  * @param cidx chroma component(cb, cr)
1480  */
1481 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1482                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1483 {
1484     HEVCLocalContext *lc = s->HEVClc;
1485     uint8_t *src1        = ref0->data[cidx+1];
1486     uint8_t *src2        = ref1->data[cidx+1];
1487     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1488     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1489     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1490                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1491     int pic_width        = s->sps->width >> s->sps->hshift[1];
1492     int pic_height       = s->sps->height >> s->sps->vshift[1];
1493     Mv *mv0              = &current_mv->mv[0];
1494     Mv *mv1              = &current_mv->mv[1];
1495     int hshift = s->sps->hshift[1];
1496     int vshift = s->sps->vshift[1];
1497
1498     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1499     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1500     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1501     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1502     intptr_t _mx0 = mx0 << (1 - hshift);
1503     intptr_t _my0 = my0 << (1 - vshift);
1504     intptr_t _mx1 = mx1 << (1 - hshift);
1505     intptr_t _my1 = my1 << (1 - vshift);
1506
1507     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1508     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1509     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1510     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1511     int idx = ff_hevc_pel_weight[block_w];
1512     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1513     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1514
1515     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1516         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1517         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1518         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1519         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1520         int buf_offset1 = EPEL_EXTRA_BEFORE *
1521                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1522
1523         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1524                                  edge_emu_stride, src1stride,
1525                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1526                                  x_off0 - EPEL_EXTRA_BEFORE,
1527                                  y_off0 - EPEL_EXTRA_BEFORE,
1528                                  pic_width, pic_height);
1529
1530         src1 = lc->edge_emu_buffer + buf_offset1;
1531         src1stride = edge_emu_stride;
1532     }
1533
1534     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1535         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1536         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1537         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1538         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1539         int buf_offset1 = EPEL_EXTRA_BEFORE *
1540                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1541
1542         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1543                                  edge_emu_stride, src2stride,
1544                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1545                                  x_off1 - EPEL_EXTRA_BEFORE,
1546                                  y_off1 - EPEL_EXTRA_BEFORE,
1547                                  pic_width, pic_height);
1548
1549         src2 = lc->edge_emu_buffer2 + buf_offset1;
1550         src2stride = edge_emu_stride;
1551     }
1552
1553     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1554                                                 block_h, _mx0, _my0, block_w);
1555     if (!weight_flag)
1556         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1557                                                        src2, src2stride, lc->tmp,
1558                                                        block_h, _mx1, _my1, block_w);
1559     else
1560         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1561                                                          src2, src2stride, lc->tmp,
1562                                                          block_h,
1563                                                          s->sh.chroma_log2_weight_denom,
1564                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1565                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1566                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1567                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1568                                                          _mx1, _my1, block_w);
1569 }
1570
1571 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1572                                 const Mv *mv, int y0, int height)
1573 {
1574     int y = (mv->y >> 2) + y0 + height + 9;
1575
1576     if (s->threads_type == FF_THREAD_FRAME )
1577         ff_thread_await_progress(&ref->tf, y, 0);
1578 }
1579
1580 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1581                                 int nPbW, int nPbH,
1582                                 int log2_cb_size, int partIdx, int idx)
1583 {
1584 #define POS(c_idx, x, y)                                                              \
1585     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1586                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1587     HEVCLocalContext *lc = s->HEVClc;
1588     int merge_idx = 0;
1589     struct MvField current_mv = {{{ 0 }}};
1590
1591     int min_pu_width = s->sps->min_pu_width;
1592
1593     MvField *tab_mvf = s->ref->tab_mvf;
1594     RefPicList  *refPicList = s->ref->refPicList;
1595     HEVCFrame *ref0, *ref1;
1596     uint8_t *dst0 = POS(0, x0, y0);
1597     uint8_t *dst1 = POS(1, x0, y0);
1598     uint8_t *dst2 = POS(2, x0, y0);
1599     int log2_min_cb_size = s->sps->log2_min_cb_size;
1600     int min_cb_width     = s->sps->min_cb_width;
1601     int x_cb             = x0 >> log2_min_cb_size;
1602     int y_cb             = y0 >> log2_min_cb_size;
1603     int ref_idx[2];
1604     int mvp_flag[2];
1605     int x_pu, y_pu;
1606     int i, j;
1607
1608     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1609         if (s->sh.max_num_merge_cand > 1)
1610             merge_idx = ff_hevc_merge_idx_decode(s);
1611         else
1612             merge_idx = 0;
1613
1614         ff_hevc_luma_mv_merge_mode(s, x0, y0,
1615                                    1 << log2_cb_size,
1616                                    1 << log2_cb_size,
1617                                    log2_cb_size, partIdx,
1618                                    merge_idx, &current_mv);
1619         x_pu = x0 >> s->sps->log2_min_pu_size;
1620         y_pu = y0 >> s->sps->log2_min_pu_size;
1621
1622         for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1623             for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1624                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1625     } else { /* MODE_INTER */
1626         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1627         if (lc->pu.merge_flag) {
1628             if (s->sh.max_num_merge_cand > 1)
1629                 merge_idx = ff_hevc_merge_idx_decode(s);
1630             else
1631                 merge_idx = 0;
1632
1633             ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1634                                        partIdx, merge_idx, &current_mv);
1635             x_pu = x0 >> s->sps->log2_min_pu_size;
1636             y_pu = y0 >> s->sps->log2_min_pu_size;
1637
1638             for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1639                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1640                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1641         } else {
1642             enum InterPredIdc inter_pred_idc = PRED_L0;
1643             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1644             current_mv.pred_flag = 0;
1645             if (s->sh.slice_type == B_SLICE)
1646                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1647
1648             if (inter_pred_idc != PRED_L1) {
1649                 if (s->sh.nb_refs[L0]) {
1650                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1651                     current_mv.ref_idx[0] = ref_idx[0];
1652                 }
1653                 current_mv.pred_flag = PF_L0;
1654                 ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1655                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
1656                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1657                                          partIdx, merge_idx, &current_mv,
1658                                          mvp_flag[0], 0);
1659                 current_mv.mv[0].x += lc->pu.mvd.x;
1660                 current_mv.mv[0].y += lc->pu.mvd.y;
1661             }
1662
1663             if (inter_pred_idc != PRED_L0) {
1664                 if (s->sh.nb_refs[L1]) {
1665                     ref_idx[1] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1666                     current_mv.ref_idx[1] = ref_idx[1];
1667                 }
1668
1669                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1670                     AV_ZERO32(&lc->pu.mvd);
1671                 } else {
1672                     ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1673                 }
1674
1675                 current_mv.pred_flag += PF_L1;
1676                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
1677                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1678                                          partIdx, merge_idx, &current_mv,
1679                                          mvp_flag[1], 1);
1680                 current_mv.mv[1].x += lc->pu.mvd.x;
1681                 current_mv.mv[1].y += lc->pu.mvd.y;
1682             }
1683
1684             x_pu = x0 >> s->sps->log2_min_pu_size;
1685             y_pu = y0 >> s->sps->log2_min_pu_size;
1686
1687             for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1688                 for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1689                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1690         }
1691     }
1692
1693     if (current_mv.pred_flag & PF_L0) {
1694         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1695         if (!ref0)
1696             return;
1697         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1698     }
1699     if (current_mv.pred_flag & PF_L1) {
1700         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1701         if (!ref1)
1702             return;
1703         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1704     }
1705
1706     if (current_mv.pred_flag == PF_L0) {
1707         int x0_c = x0 >> s->sps->hshift[1];
1708         int y0_c = y0 >> s->sps->vshift[1];
1709         int nPbW_c = nPbW >> s->sps->hshift[1];
1710         int nPbH_c = nPbH >> s->sps->vshift[1];
1711
1712         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1713                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1714                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1715                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1716
1717         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1718                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1719                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1720         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1721                       0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1722                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1723     } else if (current_mv.pred_flag == PF_L1) {
1724         int x0_c = x0 >> s->sps->hshift[1];
1725         int y0_c = y0 >> s->sps->vshift[1];
1726         int nPbW_c = nPbW >> s->sps->hshift[1];
1727         int nPbH_c = nPbH >> s->sps->vshift[1];
1728
1729         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1730                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1731                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1732                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1733
1734         chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1735                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1736                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1737
1738         chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1739                       1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1740                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1741     } else if (current_mv.pred_flag == PF_BI) {
1742         int x0_c = x0 >> s->sps->hshift[1];
1743         int y0_c = y0 >> s->sps->vshift[1];
1744         int nPbW_c = nPbW >> s->sps->hshift[1];
1745         int nPbH_c = nPbH >> s->sps->vshift[1];
1746
1747         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1748                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1749                    ref1->frame, &current_mv.mv[1], &current_mv);
1750
1751         chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1752                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1753
1754         chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1755                      x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1756     }
1757 }
1758
1759 /**
1760  * 8.4.1
1761  */
1762 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1763                                 int prev_intra_luma_pred_flag)
1764 {
1765     HEVCLocalContext *lc = s->HEVClc;
1766     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1767     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1768     int min_pu_width     = s->sps->min_pu_width;
1769     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1770     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1771     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1772
1773     int cand_up   = (lc->ctb_up_flag || y0b) ?
1774                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1775     int cand_left = (lc->ctb_left_flag || x0b) ?
1776                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1777
1778     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1779
1780     MvField *tab_mvf = s->ref->tab_mvf;
1781     int intra_pred_mode;
1782     int candidate[3];
1783     int i, j;
1784
1785     // intra_pred_mode prediction does not cross vertical CTB boundaries
1786     if ((y0 - 1) < y_ctb)
1787         cand_up = INTRA_DC;
1788
1789     if (cand_left == cand_up) {
1790         if (cand_left < 2) {
1791             candidate[0] = INTRA_PLANAR;
1792             candidate[1] = INTRA_DC;
1793             candidate[2] = INTRA_ANGULAR_26;
1794         } else {
1795             candidate[0] = cand_left;
1796             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1797             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1798         }
1799     } else {
1800         candidate[0] = cand_left;
1801         candidate[1] = cand_up;
1802         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1803             candidate[2] = INTRA_PLANAR;
1804         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1805             candidate[2] = INTRA_DC;
1806         } else {
1807             candidate[2] = INTRA_ANGULAR_26;
1808         }
1809     }
1810
1811     if (prev_intra_luma_pred_flag) {
1812         intra_pred_mode = candidate[lc->pu.mpm_idx];
1813     } else {
1814         if (candidate[0] > candidate[1])
1815             FFSWAP(uint8_t, candidate[0], candidate[1]);
1816         if (candidate[0] > candidate[2])
1817             FFSWAP(uint8_t, candidate[0], candidate[2]);
1818         if (candidate[1] > candidate[2])
1819             FFSWAP(uint8_t, candidate[1], candidate[2]);
1820
1821         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1822         for (i = 0; i < 3; i++)
1823             if (intra_pred_mode >= candidate[i])
1824                 intra_pred_mode++;
1825     }
1826
1827     /* write the intra prediction units into the mv array */
1828     if (!size_in_pus)
1829         size_in_pus = 1;
1830     for (i = 0; i < size_in_pus; i++) {
1831         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1832                intra_pred_mode, size_in_pus);
1833
1834         for (j = 0; j < size_in_pus; j++) {
1835             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1836         }
1837     }
1838
1839     return intra_pred_mode;
1840 }
1841
1842 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1843                                           int log2_cb_size, int ct_depth)
1844 {
1845     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1846     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1847     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1848     int y;
1849
1850     for (y = 0; y < length; y++)
1851         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1852                ct_depth, length);
1853 }
1854
1855 static const uint8_t tab_mode_idx[] = {
1856      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1857     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1858
1859 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1860                                   int log2_cb_size)
1861 {
1862     HEVCLocalContext *lc = s->HEVClc;
1863     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1864     uint8_t prev_intra_luma_pred_flag[4];
1865     int split   = lc->cu.part_mode == PART_NxN;
1866     int pb_size = (1 << log2_cb_size) >> split;
1867     int side    = split + 1;
1868     int chroma_mode;
1869     int i, j;
1870
1871     for (i = 0; i < side; i++)
1872         for (j = 0; j < side; j++)
1873             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1874
1875     for (i = 0; i < side; i++) {
1876         for (j = 0; j < side; j++) {
1877             if (prev_intra_luma_pred_flag[2 * i + j])
1878                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1879             else
1880                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1881
1882             lc->pu.intra_pred_mode[2 * i + j] =
1883                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1884                                      prev_intra_luma_pred_flag[2 * i + j]);
1885         }
1886     }
1887
1888     if (s->sps->chroma_format_idc == 3) {
1889         for (i = 0; i < side; i++) {
1890             for (j = 0; j < side; j++) {
1891                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1892                 if (chroma_mode != 4) {
1893                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1894                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1895                     else
1896                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1897                 } else {
1898                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1899                 }
1900             }
1901         }
1902     } else if (s->sps->chroma_format_idc == 2) {
1903         int mode_idx;
1904         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1905         if (chroma_mode != 4) {
1906             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1907                 mode_idx = 34;
1908             else
1909                 mode_idx = intra_chroma_table[chroma_mode];
1910         } else {
1911             mode_idx = lc->pu.intra_pred_mode[0];
1912         }
1913         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1914     } else if (s->sps->chroma_format_idc != 0) {
1915         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1916         if (chroma_mode != 4) {
1917             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1918                 lc->pu.intra_pred_mode_c[0] = 34;
1919             else
1920                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1921         } else {
1922             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1923         }
1924     }
1925 }
1926
1927 static void intra_prediction_unit_default_value(HEVCContext *s,
1928                                                 int x0, int y0,
1929                                                 int log2_cb_size)
1930 {
1931     HEVCLocalContext *lc = s->HEVClc;
1932     int pb_size          = 1 << log2_cb_size;
1933     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1934     int min_pu_width     = s->sps->min_pu_width;
1935     MvField *tab_mvf     = s->ref->tab_mvf;
1936     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1937     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1938     int j, k;
1939
1940     if (size_in_pus == 0)
1941         size_in_pus = 1;
1942     for (j = 0; j < size_in_pus; j++)
1943         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1944     if (lc->cu.pred_mode == MODE_INTRA)
1945         for (j = 0; j < size_in_pus; j++)
1946             for (k = 0; k < size_in_pus; k++)
1947                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1948 }
1949
1950 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1951 {
1952     int cb_size          = 1 << log2_cb_size;
1953     HEVCLocalContext *lc = s->HEVClc;
1954     int log2_min_cb_size = s->sps->log2_min_cb_size;
1955     int length           = cb_size >> log2_min_cb_size;
1956     int min_cb_width     = s->sps->min_cb_width;
1957     int x_cb             = x0 >> log2_min_cb_size;
1958     int y_cb             = y0 >> log2_min_cb_size;
1959     int idx              = log2_cb_size - 2;
1960     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1961     int x, y, ret;
1962
1963     lc->cu.x                = x0;
1964     lc->cu.y                = y0;
1965     lc->cu.rqt_root_cbf     = 1;
1966     lc->cu.pred_mode        = MODE_INTRA;
1967     lc->cu.part_mode        = PART_2Nx2N;
1968     lc->cu.intra_split_flag = 0;
1969     lc->cu.pcm_flag         = 0;
1970
1971     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1972     for (x = 0; x < 4; x++)
1973         lc->pu.intra_pred_mode[x] = 1;
1974     if (s->pps->transquant_bypass_enable_flag) {
1975         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1976         if (lc->cu.cu_transquant_bypass_flag)
1977             set_deblocking_bypass(s, x0, y0, log2_cb_size);
1978     } else
1979         lc->cu.cu_transquant_bypass_flag = 0;
1980
1981     if (s->sh.slice_type != I_SLICE) {
1982         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
1983
1984         x = y_cb * min_cb_width + x_cb;
1985         for (y = 0; y < length; y++) {
1986             memset(&s->skip_flag[x], skip_flag, length);
1987             x += min_cb_width;
1988         }
1989         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
1990     } else {
1991         x = y_cb * min_cb_width + x_cb;
1992         for (y = 0; y < length; y++) {
1993             memset(&s->skip_flag[x], 0, length);
1994             x += min_cb_width;
1995         }
1996     }
1997
1998     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
1999         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2000         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2001
2002         if (!s->sh.disable_deblocking_filter_flag)
2003             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2004     } else {
2005         if (s->sh.slice_type != I_SLICE)
2006             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2007         if (lc->cu.pred_mode != MODE_INTRA ||
2008             log2_cb_size == s->sps->log2_min_cb_size) {
2009             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2010             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2011                                       lc->cu.pred_mode == MODE_INTRA;
2012         }
2013
2014         if (lc->cu.pred_mode == MODE_INTRA) {
2015             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2016                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2017                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2018                 lc->cu.pcm_flag = ff_hevc_pcm_flag_decode(s);
2019             }
2020             if (lc->cu.pcm_flag) {
2021                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2022                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2023                 if (s->sps->pcm.loop_filter_disable_flag)
2024                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2025
2026                 if (ret < 0)
2027                     return ret;
2028             } else {
2029                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2030             }
2031         } else {
2032             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2033             switch (lc->cu.part_mode) {
2034             case PART_2Nx2N:
2035                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2036                 break;
2037             case PART_2NxN:
2038                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2039                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2040                 break;
2041             case PART_Nx2N:
2042                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2043                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2044                 break;
2045             case PART_2NxnU:
2046                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2047                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2048                 break;
2049             case PART_2NxnD:
2050                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2051                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2052                 break;
2053             case PART_nLx2N:
2054                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2055                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2056                 break;
2057             case PART_nRx2N:
2058                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2059                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2060                 break;
2061             case PART_NxN:
2062                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2063                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2064                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2065                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2066                 break;
2067             }
2068         }
2069
2070         if (!lc->cu.pcm_flag) {
2071             if (lc->cu.pred_mode != MODE_INTRA &&
2072                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2073                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2074             }
2075             if (lc->cu.rqt_root_cbf) {
2076                 const static int cbf[2] = { 0 };
2077                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2078                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2079                                          s->sps->max_transform_hierarchy_depth_inter;
2080                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2081                                          log2_cb_size,
2082                                          log2_cb_size, 0, 0, cbf, cbf);
2083                 if (ret < 0)
2084                     return ret;
2085             } else {
2086                 if (!s->sh.disable_deblocking_filter_flag)
2087                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2088             }
2089         }
2090     }
2091
2092     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2093         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2094
2095     x = y_cb * min_cb_width + x_cb;
2096     for (y = 0; y < length; y++) {
2097         memset(&s->qp_y_tab[x], lc->qp_y, length);
2098         x += min_cb_width;
2099     }
2100
2101     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2102        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2103         lc->qPy_pred = lc->qp_y;
2104     }
2105
2106     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2107
2108     return 0;
2109 }
2110
2111 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2112                                int log2_cb_size, int cb_depth)
2113 {
2114     HEVCLocalContext *lc = s->HEVClc;
2115     const int cb_size    = 1 << log2_cb_size;
2116     int ret;
2117     int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2118     int split_cu;
2119
2120     lc->ct_depth = cb_depth;
2121     if (x0 + cb_size <= s->sps->width  &&
2122         y0 + cb_size <= s->sps->height &&
2123         log2_cb_size > s->sps->log2_min_cb_size) {
2124         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2125     } else {
2126         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2127     }
2128     if (s->pps->cu_qp_delta_enabled_flag &&
2129         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2130         lc->tu.is_cu_qp_delta_coded = 0;
2131         lc->tu.cu_qp_delta          = 0;
2132     }
2133
2134     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2135         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2136         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2137     }
2138
2139     if (split_cu) {
2140         const int cb_size_split = cb_size >> 1;
2141         const int x1 = x0 + cb_size_split;
2142         const int y1 = y0 + cb_size_split;
2143
2144         int more_data = 0;
2145
2146         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2147         if (more_data < 0)
2148             return more_data;
2149
2150         if (more_data && x1 < s->sps->width) {
2151             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2152             if (more_data < 0)
2153                 return more_data;
2154         }
2155         if (more_data && y1 < s->sps->height) {
2156             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2157             if (more_data < 0)
2158                 return more_data;
2159         }
2160         if (more_data && x1 < s->sps->width &&
2161             y1 < s->sps->height) {
2162             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2163             if (more_data < 0)
2164                 return more_data;
2165         }
2166
2167         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2168             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2169             lc->qPy_pred = lc->qp_y;
2170
2171         if (more_data)
2172             return ((x1 + cb_size_split) < s->sps->width ||
2173                     (y1 + cb_size_split) < s->sps->height);
2174         else
2175             return 0;
2176     } else {
2177         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2178         if (ret < 0)
2179             return ret;
2180         if ((!((x0 + cb_size) %
2181                (1 << (s->sps->log2_ctb_size))) ||
2182              (x0 + cb_size >= s->sps->width)) &&
2183             (!((y0 + cb_size) %
2184                (1 << (s->sps->log2_ctb_size))) ||
2185              (y0 + cb_size >= s->sps->height))) {
2186             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2187             return !end_of_slice_flag;
2188         } else {
2189             return 1;
2190         }
2191     }
2192
2193     return 0;
2194 }
2195
2196 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2197                                  int ctb_addr_ts)
2198 {
2199     HEVCLocalContext *lc  = s->HEVClc;
2200     int ctb_size          = 1 << s->sps->log2_ctb_size;
2201     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2202     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2203
2204     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2205
2206     if (s->pps->entropy_coding_sync_enabled_flag) {
2207         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2208             lc->first_qp_group = 1;
2209         lc->end_of_tiles_x = s->sps->width;
2210     } else if (s->pps->tiles_enabled_flag) {
2211         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2212             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2213             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2214             lc->first_qp_group   = 1;
2215         }
2216     } else {
2217         lc->end_of_tiles_x = s->sps->width;
2218     }
2219
2220     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2221
2222     lc->boundary_flags = 0;
2223     if (s->pps->tiles_enabled_flag) {
2224         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2225             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2226         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2227             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2228         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2229             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2230         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2231             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2232     } else {
2233         if (!ctb_addr_in_slice > 0)
2234             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2235         if (ctb_addr_in_slice < s->sps->ctb_width)
2236             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2237     }
2238
2239     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2240     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2241     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2242     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2243 }
2244
2245 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2246 {
2247     HEVCContext *s  = avctxt->priv_data;
2248     int ctb_size    = 1 << s->sps->log2_ctb_size;
2249     int more_data   = 1;
2250     int x_ctb       = 0;
2251     int y_ctb       = 0;
2252     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2253
2254     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2255         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2256         return AVERROR_INVALIDDATA;
2257     }
2258
2259     if (s->sh.dependent_slice_segment_flag) {
2260         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2261         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2262             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2263             return AVERROR_INVALIDDATA;
2264         }
2265     }
2266
2267     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2268         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2269
2270         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2271         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2272         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2273
2274         ff_hevc_cabac_init(s, ctb_addr_ts);
2275
2276         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2277
2278         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2279         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2280         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2281
2282         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2283         if (more_data < 0) {
2284             s->tab_slice_address[ctb_addr_rs] = -1;
2285             return more_data;
2286         }
2287
2288
2289         ctb_addr_ts++;
2290         ff_hevc_save_states(s, ctb_addr_ts);
2291         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2292     }
2293
2294     if (x_ctb + ctb_size >= s->sps->width &&
2295         y_ctb + ctb_size >= s->sps->height)
2296         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2297
2298     return ctb_addr_ts;
2299 }
2300
2301 static int hls_slice_data(HEVCContext *s)
2302 {
2303     int arg[2];
2304     int ret[2];
2305
2306     arg[0] = 0;
2307     arg[1] = 1;
2308
2309     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2310     return ret[0];
2311 }
2312 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2313 {
2314     HEVCContext *s1  = avctxt->priv_data, *s;
2315     HEVCLocalContext *lc;
2316     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2317     int more_data   = 1;
2318     int *ctb_row_p    = input_ctb_row;
2319     int ctb_row = ctb_row_p[job];
2320     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2321     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2322     int thread = ctb_row % s1->threads_number;
2323     int ret;
2324
2325     s = s1->sList[self_id];
2326     lc = s->HEVClc;
2327
2328     if(ctb_row) {
2329         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2330
2331         if (ret < 0)
2332             return ret;
2333         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2334     }
2335
2336     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2337         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2338         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2339
2340         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2341
2342         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2343
2344         if (avpriv_atomic_int_get(&s1->wpp_err)){
2345             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2346             return 0;
2347         }
2348
2349         ff_hevc_cabac_init(s, ctb_addr_ts);
2350         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2351         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2352
2353         if (more_data < 0) {
2354             s->tab_slice_address[ctb_addr_rs] = -1;
2355             return more_data;
2356         }
2357
2358         ctb_addr_ts++;
2359
2360         ff_hevc_save_states(s, ctb_addr_ts);
2361         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2362         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2363
2364         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2365             avpriv_atomic_int_set(&s1->wpp_err,  1);
2366             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2367             return 0;
2368         }
2369
2370         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2371             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2372             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2373             return ctb_addr_ts;
2374         }
2375         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2376         x_ctb+=ctb_size;
2377
2378         if(x_ctb >= s->sps->width) {
2379             break;
2380         }
2381     }
2382     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2383
2384     return 0;
2385 }
2386
2387 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2388 {
2389     HEVCLocalContext *lc = s->HEVClc;
2390     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2391     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2392     int offset;
2393     int startheader, cmpt = 0;
2394     int i, j, res = 0;
2395
2396
2397     if (!s->sList[1]) {
2398         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2399
2400
2401         for (i = 1; i < s->threads_number; i++) {
2402             s->sList[i] = av_malloc(sizeof(HEVCContext));
2403             memcpy(s->sList[i], s, sizeof(HEVCContext));
2404             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2405             s->sList[i]->HEVClc = s->HEVClcList[i];
2406         }
2407     }
2408
2409     offset = (lc->gb.index >> 3);
2410
2411     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2412         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2413             startheader--;
2414             cmpt++;
2415         }
2416     }
2417
2418     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2419         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2420         for (j = 0, cmpt = 0, startheader = offset
2421              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2422             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2423                 startheader--;
2424                 cmpt++;
2425             }
2426         }
2427         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2428         s->sh.offset[i - 1] = offset;
2429
2430     }
2431     if (s->sh.num_entry_point_offsets != 0) {
2432         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2433         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2434         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2435
2436     }
2437     s->data = nal;
2438
2439     for (i = 1; i < s->threads_number; i++) {
2440         s->sList[i]->HEVClc->first_qp_group = 1;
2441         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2442         memcpy(s->sList[i], s, sizeof(HEVCContext));
2443         s->sList[i]->HEVClc = s->HEVClcList[i];
2444     }
2445
2446     avpriv_atomic_int_set(&s->wpp_err, 0);
2447     ff_reset_entries(s->avctx);
2448
2449     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2450         arg[i] = i;
2451         ret[i] = 0;
2452     }
2453
2454     if (s->pps->entropy_coding_sync_enabled_flag)
2455         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2456
2457     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2458         res += ret[i];
2459     av_free(ret);
2460     av_free(arg);
2461     return res;
2462 }
2463
2464 /**
2465  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2466  * 0 if the unit should be skipped, 1 otherwise
2467  */
2468 static int hls_nal_unit(HEVCContext *s)
2469 {
2470     GetBitContext *gb = &s->HEVClc->gb;
2471     int nuh_layer_id;
2472
2473     if (get_bits1(gb) != 0)
2474         return AVERROR_INVALIDDATA;
2475
2476     s->nal_unit_type = get_bits(gb, 6);
2477
2478     nuh_layer_id   = get_bits(gb, 6);
2479     s->temporal_id = get_bits(gb, 3) - 1;
2480     if (s->temporal_id < 0)
2481         return AVERROR_INVALIDDATA;
2482
2483     av_log(s->avctx, AV_LOG_DEBUG,
2484            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2485            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2486
2487     return nuh_layer_id == 0;
2488 }
2489
2490 static int set_side_data(HEVCContext *s)
2491 {
2492     AVFrame *out = s->ref->frame;
2493
2494     if (s->sei_frame_packing_present &&
2495         s->frame_packing_arrangement_type >= 3 &&
2496         s->frame_packing_arrangement_type <= 5 &&
2497         s->content_interpretation_type > 0 &&
2498         s->content_interpretation_type < 3) {
2499         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2500         if (!stereo)
2501             return AVERROR(ENOMEM);
2502
2503         switch (s->frame_packing_arrangement_type) {
2504         case 3:
2505             if (s->quincunx_subsampling)
2506                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2507             else
2508                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2509             break;
2510         case 4:
2511             stereo->type = AV_STEREO3D_TOPBOTTOM;
2512             break;
2513         case 5:
2514             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2515             break;
2516         }
2517
2518         if (s->content_interpretation_type == 2)
2519             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2520     }
2521
2522     if (s->sei_display_orientation_present &&
2523         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2524         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2525         AVFrameSideData *rotation = av_frame_new_side_data(out,
2526                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2527                                                            sizeof(int32_t) * 9);
2528         if (!rotation)
2529             return AVERROR(ENOMEM);
2530
2531         av_display_rotation_set((int32_t *)rotation->data, angle);
2532         av_display_matrix_flip((int32_t *)rotation->data,
2533                                s->sei_vflip, s->sei_hflip);
2534     }
2535
2536     return 0;
2537 }
2538
2539 static int hevc_frame_start(HEVCContext *s)
2540 {
2541     HEVCLocalContext *lc = s->HEVClc;
2542     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2543                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2544     int ret;
2545
2546     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2547     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2548     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2549     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2550     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2551
2552     s->is_decoded        = 0;
2553     s->first_nal_type    = s->nal_unit_type;
2554
2555     if (s->pps->tiles_enabled_flag)
2556         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2557
2558     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2559     if (ret < 0)
2560         goto fail;
2561
2562     ret = ff_hevc_frame_rps(s);
2563     if (ret < 0) {
2564         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2565         goto fail;
2566     }
2567
2568     s->ref->frame->key_frame = IS_IRAP(s);
2569
2570     ret = set_side_data(s);
2571     if (ret < 0)
2572         goto fail;
2573
2574     s->frame->pict_type = 3 - s->sh.slice_type;
2575
2576     if (!IS_IRAP(s))
2577         ff_hevc_bump_frame(s);
2578
2579     av_frame_unref(s->output_frame);
2580     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2581     if (ret < 0)
2582         goto fail;
2583
2584     ff_thread_finish_setup(s->avctx);
2585
2586     return 0;
2587
2588 fail:
2589     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2590         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2591     s->ref = NULL;
2592     return ret;
2593 }
2594
2595 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
2596 {
2597     HEVCLocalContext *lc = s->HEVClc;
2598     GetBitContext *gb    = &lc->gb;
2599     int ctb_addr_ts, ret;
2600
2601     ret = init_get_bits8(gb, nal, length);
2602     if (ret < 0)
2603         return ret;
2604
2605     ret = hls_nal_unit(s);
2606     if (ret < 0) {
2607         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2608                s->nal_unit_type);
2609         goto fail;
2610     } else if (!ret)
2611         return 0;
2612
2613     switch (s->nal_unit_type) {
2614     case NAL_VPS:
2615         ret = ff_hevc_decode_nal_vps(s);
2616         if (ret < 0)
2617             goto fail;
2618         break;
2619     case NAL_SPS:
2620         ret = ff_hevc_decode_nal_sps(s);
2621         if (ret < 0)
2622             goto fail;
2623         break;
2624     case NAL_PPS:
2625         ret = ff_hevc_decode_nal_pps(s);
2626         if (ret < 0)
2627             goto fail;
2628         break;
2629     case NAL_SEI_PREFIX:
2630     case NAL_SEI_SUFFIX:
2631         ret = ff_hevc_decode_nal_sei(s);
2632         if (ret < 0)
2633             goto fail;
2634         break;
2635     case NAL_TRAIL_R:
2636     case NAL_TRAIL_N:
2637     case NAL_TSA_N:
2638     case NAL_TSA_R:
2639     case NAL_STSA_N:
2640     case NAL_STSA_R:
2641     case NAL_BLA_W_LP:
2642     case NAL_BLA_W_RADL:
2643     case NAL_BLA_N_LP:
2644     case NAL_IDR_W_RADL:
2645     case NAL_IDR_N_LP:
2646     case NAL_CRA_NUT:
2647     case NAL_RADL_N:
2648     case NAL_RADL_R:
2649     case NAL_RASL_N:
2650     case NAL_RASL_R:
2651         ret = hls_slice_header(s);
2652         if (ret < 0)
2653             return ret;
2654
2655         if (s->max_ra == INT_MAX) {
2656             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2657                 s->max_ra = s->poc;
2658             } else {
2659                 if (IS_IDR(s))
2660                     s->max_ra = INT_MIN;
2661             }
2662         }
2663
2664         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2665             s->poc <= s->max_ra) {
2666             s->is_decoded = 0;
2667             break;
2668         } else {
2669             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2670                 s->max_ra = INT_MIN;
2671         }
2672
2673         if (s->sh.first_slice_in_pic_flag) {
2674             ret = hevc_frame_start(s);
2675             if (ret < 0)
2676                 return ret;
2677         } else if (!s->ref) {
2678             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2679             goto fail;
2680         }
2681
2682         if (s->nal_unit_type != s->first_nal_type) {
2683             av_log(s->avctx, AV_LOG_ERROR,
2684                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2685                    s->first_nal_type, s->nal_unit_type);
2686             return AVERROR_INVALIDDATA;
2687         }
2688
2689         if (!s->sh.dependent_slice_segment_flag &&
2690             s->sh.slice_type != I_SLICE) {
2691             ret = ff_hevc_slice_rpl(s);
2692             if (ret < 0) {
2693                 av_log(s->avctx, AV_LOG_WARNING,
2694                        "Error constructing the reference lists for the current slice.\n");
2695                 goto fail;
2696             }
2697         }
2698
2699         if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2700             ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
2701         else
2702             ctb_addr_ts = hls_slice_data(s);
2703         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2704             s->is_decoded = 1;
2705         }
2706
2707         if (ctb_addr_ts < 0) {
2708             ret = ctb_addr_ts;
2709             goto fail;
2710         }
2711         break;
2712     case NAL_EOS_NUT:
2713     case NAL_EOB_NUT:
2714         s->seq_decode = (s->seq_decode + 1) & 0xff;
2715         s->max_ra     = INT_MAX;
2716         break;
2717     case NAL_AUD:
2718     case NAL_FD_NUT:
2719         break;
2720     default:
2721         av_log(s->avctx, AV_LOG_INFO,
2722                "Skipping NAL unit %d\n", s->nal_unit_type);
2723     }
2724
2725     return 0;
2726 fail:
2727     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2728         return ret;
2729     return 0;
2730 }
2731
2732 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2733  * between these functions would be nice. */
2734 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2735                          HEVCNAL *nal)
2736 {
2737     int i, si, di;
2738     uint8_t *dst;
2739
2740     s->skipped_bytes = 0;
2741 #define STARTCODE_TEST                                                  \
2742         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2743             if (src[i + 2] != 3) {                                      \
2744                 /* startcode, so we must be past the end */             \
2745                 length = i;                                             \
2746             }                                                           \
2747             break;                                                      \
2748         }
2749 #if HAVE_FAST_UNALIGNED
2750 #define FIND_FIRST_ZERO                                                 \
2751         if (i > 0 && !src[i])                                           \
2752             i--;                                                        \
2753         while (src[i])                                                  \
2754             i++
2755 #if HAVE_FAST_64BIT
2756     for (i = 0; i + 1 < length; i += 9) {
2757         if (!((~AV_RN64A(src + i) &
2758                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2759               0x8000800080008080ULL))
2760             continue;
2761         FIND_FIRST_ZERO;
2762         STARTCODE_TEST;
2763         i -= 7;
2764     }
2765 #else
2766     for (i = 0; i + 1 < length; i += 5) {
2767         if (!((~AV_RN32A(src + i) &
2768                (AV_RN32A(src + i) - 0x01000101U)) &
2769               0x80008080U))
2770             continue;
2771         FIND_FIRST_ZERO;
2772         STARTCODE_TEST;
2773         i -= 3;
2774     }
2775 #endif /* HAVE_FAST_64BIT */
2776 #else
2777     for (i = 0; i + 1 < length; i += 2) {
2778         if (src[i])
2779             continue;
2780         if (i > 0 && src[i - 1] == 0)
2781             i--;
2782         STARTCODE_TEST;
2783     }
2784 #endif /* HAVE_FAST_UNALIGNED */
2785
2786     if (i >= length - 1) { // no escaped 0
2787         nal->data = src;
2788         nal->size = length;
2789         return length;
2790     }
2791
2792     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2793                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2794     if (!nal->rbsp_buffer)
2795         return AVERROR(ENOMEM);
2796
2797     dst = nal->rbsp_buffer;
2798
2799     memcpy(dst, src, i);
2800     si = di = i;
2801     while (si + 2 < length) {
2802         // remove escapes (very rare 1:2^22)
2803         if (src[si + 2] > 3) {
2804             dst[di++] = src[si++];
2805             dst[di++] = src[si++];
2806         } else if (src[si] == 0 && src[si + 1] == 0) {
2807             if (src[si + 2] == 3) { // escape
2808                 dst[di++] = 0;
2809                 dst[di++] = 0;
2810                 si       += 3;
2811
2812                 s->skipped_bytes++;
2813                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2814                     s->skipped_bytes_pos_size *= 2;
2815                     av_reallocp_array(&s->skipped_bytes_pos,
2816                             s->skipped_bytes_pos_size,
2817                             sizeof(*s->skipped_bytes_pos));
2818                     if (!s->skipped_bytes_pos)
2819                         return AVERROR(ENOMEM);
2820                 }
2821                 if (s->skipped_bytes_pos)
2822                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2823                 continue;
2824             } else // next start code
2825                 goto nsc;
2826         }
2827
2828         dst[di++] = src[si++];
2829     }
2830     while (si < length)
2831         dst[di++] = src[si++];
2832
2833 nsc:
2834     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2835
2836     nal->data = dst;
2837     nal->size = di;
2838     return si;
2839 }
2840
2841 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2842 {
2843     int i, consumed, ret = 0;
2844
2845     s->ref = NULL;
2846     s->last_eos = s->eos;
2847     s->eos = 0;
2848
2849     /* split the input packet into NAL units, so we know the upper bound on the
2850      * number of slices in the frame */
2851     s->nb_nals = 0;
2852     while (length >= 4) {
2853         HEVCNAL *nal;
2854         int extract_length = 0;
2855
2856         if (s->is_nalff) {
2857             int i;
2858             for (i = 0; i < s->nal_length_size; i++)
2859                 extract_length = (extract_length << 8) | buf[i];
2860             buf    += s->nal_length_size;
2861             length -= s->nal_length_size;
2862
2863             if (extract_length > length) {
2864                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2865                 ret = AVERROR_INVALIDDATA;
2866                 goto fail;
2867             }
2868         } else {
2869             /* search start code */
2870             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2871                 ++buf;
2872                 --length;
2873                 if (length < 4) {
2874                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2875                     ret = AVERROR_INVALIDDATA;
2876                     goto fail;
2877                 }
2878             }
2879
2880             buf           += 3;
2881             length        -= 3;
2882         }
2883
2884         if (!s->is_nalff)
2885             extract_length = length;
2886
2887         if (s->nals_allocated < s->nb_nals + 1) {
2888             int new_size = s->nals_allocated + 1;
2889             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
2890             if (!tmp) {
2891                 ret = AVERROR(ENOMEM);
2892                 goto fail;
2893             }
2894             s->nals = tmp;
2895             memset(s->nals + s->nals_allocated, 0,
2896                    (new_size - s->nals_allocated) * sizeof(*tmp));
2897             av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2898             av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2899             av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2900             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2901             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2902             s->nals_allocated = new_size;
2903         }
2904         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2905         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2906         nal = &s->nals[s->nb_nals];
2907
2908         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2909
2910         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2911         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2912         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2913
2914
2915         if (consumed < 0) {
2916             ret = consumed;
2917             goto fail;
2918         }
2919
2920         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2921         if (ret < 0)
2922             goto fail;
2923         hls_nal_unit(s);
2924
2925         if (s->nal_unit_type == NAL_EOB_NUT ||
2926             s->nal_unit_type == NAL_EOS_NUT)
2927             s->eos = 1;
2928
2929         buf    += consumed;
2930         length -= consumed;
2931     }
2932
2933     /* parse the NAL units */
2934     for (i = 0; i < s->nb_nals; i++) {
2935         int ret;
2936         s->skipped_bytes = s->skipped_bytes_nal[i];
2937         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
2938
2939         ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
2940         if (ret < 0) {
2941             av_log(s->avctx, AV_LOG_WARNING,
2942                    "Error parsing NAL unit #%d.\n", i);
2943             goto fail;
2944         }
2945     }
2946
2947 fail:
2948     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2949         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2950
2951     return ret;
2952 }
2953
2954 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2955 {
2956     int i;
2957     for (i = 0; i < 16; i++)
2958         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2959 }
2960
2961 static int verify_md5(HEVCContext *s, AVFrame *frame)
2962 {
2963     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2964     int pixel_shift;
2965     int i, j;
2966
2967     if (!desc)
2968         return AVERROR(EINVAL);
2969
2970     pixel_shift = desc->comp[0].depth_minus1 > 7;
2971
2972     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2973            s->poc);
2974
2975     /* the checksums are LE, so we have to byteswap for >8bpp formats
2976      * on BE arches */
2977 #if HAVE_BIGENDIAN
2978     if (pixel_shift && !s->checksum_buf) {
2979         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2980                        FFMAX3(frame->linesize[0], frame->linesize[1],
2981                               frame->linesize[2]));
2982         if (!s->checksum_buf)
2983             return AVERROR(ENOMEM);
2984     }
2985 #endif
2986
2987     for (i = 0; frame->data[i]; i++) {
2988         int width  = s->avctx->coded_width;
2989         int height = s->avctx->coded_height;
2990         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2991         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2992         uint8_t md5[16];
2993
2994         av_md5_init(s->md5_ctx);
2995         for (j = 0; j < h; j++) {
2996             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2997 #if HAVE_BIGENDIAN
2998             if (pixel_shift) {
2999                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3000                                     (const uint16_t *) src, w);
3001                 src = s->checksum_buf;
3002             }
3003 #endif
3004             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3005         }
3006         av_md5_final(s->md5_ctx, md5);
3007
3008         if (!memcmp(md5, s->md5[i], 16)) {
3009             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3010             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3011             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3012         } else {
3013             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3014             print_md5(s->avctx, AV_LOG_ERROR, md5);
3015             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3016             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3017             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3018             return AVERROR_INVALIDDATA;
3019         }
3020     }
3021
3022     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3023
3024     return 0;
3025 }
3026
3027 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3028                              AVPacket *avpkt)
3029 {
3030     int ret;
3031     HEVCContext *s = avctx->priv_data;
3032
3033     if (!avpkt->size) {
3034         ret = ff_hevc_output_frame(s, data, 1);
3035         if (ret < 0)
3036             return ret;
3037
3038         *got_output = ret;
3039         return 0;
3040     }
3041
3042     s->ref = NULL;
3043     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3044     if (ret < 0)
3045         return ret;
3046
3047     /* verify the SEI checksum */
3048     if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3049         s->is_md5) {
3050         ret = verify_md5(s, s->ref->frame);
3051         if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3052             ff_hevc_unref_frame(s, s->ref, ~0);
3053             return ret;
3054         }
3055     }
3056     s->is_md5 = 0;
3057
3058     if (s->is_decoded) {
3059         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3060         s->is_decoded = 0;
3061     }
3062
3063     if (s->output_frame->buf[0]) {
3064         av_frame_move_ref(data, s->output_frame);
3065         *got_output = 1;
3066     }
3067
3068     return avpkt->size;
3069 }
3070
3071 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3072 {
3073     int ret;
3074
3075     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3076     if (ret < 0)
3077         return ret;
3078
3079     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3080     if (!dst->tab_mvf_buf)
3081         goto fail;
3082     dst->tab_mvf = src->tab_mvf;
3083
3084     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3085     if (!dst->rpl_tab_buf)
3086         goto fail;
3087     dst->rpl_tab = src->rpl_tab;
3088
3089     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3090     if (!dst->rpl_buf)
3091         goto fail;
3092
3093     dst->poc        = src->poc;
3094     dst->ctb_count  = src->ctb_count;
3095     dst->window     = src->window;
3096     dst->flags      = src->flags;
3097     dst->sequence   = src->sequence;
3098
3099     return 0;
3100 fail:
3101     ff_hevc_unref_frame(s, dst, ~0);
3102     return AVERROR(ENOMEM);
3103 }
3104
3105 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3106 {
3107     HEVCContext       *s = avctx->priv_data;
3108     int i;
3109
3110     pic_arrays_free(s);
3111
3112     av_freep(&s->md5_ctx);
3113
3114     for(i=0; i < s->nals_allocated; i++) {
3115         av_freep(&s->skipped_bytes_pos_nal[i]);
3116     }
3117     av_freep(&s->skipped_bytes_pos_size_nal);
3118     av_freep(&s->skipped_bytes_nal);
3119     av_freep(&s->skipped_bytes_pos_nal);
3120
3121     av_freep(&s->cabac_state);
3122
3123     av_frame_free(&s->tmp_frame);
3124     av_frame_free(&s->output_frame);
3125
3126     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3127         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3128         av_frame_free(&s->DPB[i].frame);
3129     }
3130
3131     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3132         av_buffer_unref(&s->vps_list[i]);
3133     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3134         av_buffer_unref(&s->sps_list[i]);
3135     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3136         av_buffer_unref(&s->pps_list[i]);
3137     s->sps = NULL;
3138     s->pps = NULL;
3139     s->vps = NULL;
3140
3141     av_buffer_unref(&s->current_sps);
3142
3143     av_freep(&s->sh.entry_point_offset);
3144     av_freep(&s->sh.offset);
3145     av_freep(&s->sh.size);
3146
3147     for (i = 1; i < s->threads_number; i++) {
3148         HEVCLocalContext *lc = s->HEVClcList[i];
3149         if (lc) {
3150             av_freep(&s->HEVClcList[i]);
3151             av_freep(&s->sList[i]);
3152         }
3153     }
3154     if (s->HEVClc == s->HEVClcList[0])
3155         s->HEVClc = NULL;
3156     av_freep(&s->HEVClcList[0]);
3157
3158     for (i = 0; i < s->nals_allocated; i++)
3159         av_freep(&s->nals[i].rbsp_buffer);
3160     av_freep(&s->nals);
3161     s->nals_allocated = 0;
3162
3163     return 0;
3164 }
3165
3166 static av_cold int hevc_init_context(AVCodecContext *avctx)
3167 {
3168     HEVCContext *s = avctx->priv_data;
3169     int i;
3170
3171     s->avctx = avctx;
3172
3173     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3174     if (!s->HEVClc)
3175         goto fail;
3176     s->HEVClcList[0] = s->HEVClc;
3177     s->sList[0] = s;
3178
3179     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3180     if (!s->cabac_state)
3181         goto fail;
3182
3183     s->tmp_frame = av_frame_alloc();
3184     if (!s->tmp_frame)
3185         goto fail;
3186
3187     s->output_frame = av_frame_alloc();
3188     if (!s->output_frame)
3189         goto fail;
3190
3191     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3192         s->DPB[i].frame = av_frame_alloc();
3193         if (!s->DPB[i].frame)
3194             goto fail;
3195         s->DPB[i].tf.f = s->DPB[i].frame;
3196     }
3197
3198     s->max_ra = INT_MAX;
3199
3200     s->md5_ctx = av_md5_alloc();
3201     if (!s->md5_ctx)
3202         goto fail;
3203
3204     ff_bswapdsp_init(&s->bdsp);
3205
3206     s->context_initialized = 1;
3207     s->eos = 0;
3208
3209     return 0;
3210
3211 fail:
3212     hevc_decode_free(avctx);
3213     return AVERROR(ENOMEM);
3214 }
3215
3216 static int hevc_update_thread_context(AVCodecContext *dst,
3217                                       const AVCodecContext *src)
3218 {
3219     HEVCContext *s  = dst->priv_data;
3220     HEVCContext *s0 = src->priv_data;
3221     int i, ret;
3222
3223     if (!s->context_initialized) {
3224         ret = hevc_init_context(dst);
3225         if (ret < 0)
3226             return ret;
3227     }
3228
3229     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3230         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3231         if (s0->DPB[i].frame->buf[0]) {
3232             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3233             if (ret < 0)
3234                 return ret;
3235         }
3236     }
3237
3238     if (s->sps != s0->sps)
3239         s->sps = NULL;
3240     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3241         av_buffer_unref(&s->vps_list[i]);
3242         if (s0->vps_list[i]) {
3243             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3244             if (!s->vps_list[i])
3245                 return AVERROR(ENOMEM);
3246         }
3247     }
3248
3249     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3250         av_buffer_unref(&s->sps_list[i]);
3251         if (s0->sps_list[i]) {
3252             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3253             if (!s->sps_list[i])
3254                 return AVERROR(ENOMEM);
3255         }
3256     }
3257
3258     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3259         av_buffer_unref(&s->pps_list[i]);
3260         if (s0->pps_list[i]) {
3261             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3262             if (!s->pps_list[i])
3263                 return AVERROR(ENOMEM);
3264         }
3265     }
3266
3267     av_buffer_unref(&s->current_sps);
3268     if (s0->current_sps) {
3269         s->current_sps = av_buffer_ref(s0->current_sps);
3270         if (!s->current_sps)
3271             return AVERROR(ENOMEM);
3272     }
3273
3274     if (s->sps != s0->sps)
3275         if ((ret = set_sps(s, s0->sps)) < 0)
3276             return ret;
3277
3278     s->seq_decode = s0->seq_decode;
3279     s->seq_output = s0->seq_output;
3280     s->pocTid0    = s0->pocTid0;
3281     s->max_ra     = s0->max_ra;
3282     s->eos        = s0->eos;
3283
3284     s->is_nalff        = s0->is_nalff;
3285     s->nal_length_size = s0->nal_length_size;
3286
3287     s->threads_number      = s0->threads_number;
3288     s->threads_type        = s0->threads_type;
3289
3290     if (s0->eos) {
3291         s->seq_decode = (s->seq_decode + 1) & 0xff;
3292         s->max_ra = INT_MAX;
3293     }
3294
3295     return 0;
3296 }
3297
3298 static int hevc_decode_extradata(HEVCContext *s)
3299 {
3300     AVCodecContext *avctx = s->avctx;
3301     GetByteContext gb;
3302     int ret;
3303
3304     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3305
3306     if (avctx->extradata_size > 3 &&
3307         (avctx->extradata[0] || avctx->extradata[1] ||
3308          avctx->extradata[2] > 1)) {
3309         /* It seems the extradata is encoded as hvcC format.
3310          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3311          * is finalized. When finalized, configurationVersion will be 1 and we
3312          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3313         int i, j, num_arrays, nal_len_size;
3314
3315         s->is_nalff = 1;
3316
3317         bytestream2_skip(&gb, 21);
3318         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3319         num_arrays   = bytestream2_get_byte(&gb);
3320
3321         /* nal units in the hvcC always have length coded with 2 bytes,
3322          * so put a fake nal_length_size = 2 while parsing them */
3323         s->nal_length_size = 2;
3324
3325         /* Decode nal units from hvcC. */
3326         for (i = 0; i < num_arrays; i++) {
3327             int type = bytestream2_get_byte(&gb) & 0x3f;
3328             int cnt  = bytestream2_get_be16(&gb);
3329
3330             for (j = 0; j < cnt; j++) {
3331                 // +2 for the nal size field
3332                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3333                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3334                     av_log(s->avctx, AV_LOG_ERROR,
3335                            "Invalid NAL unit size in extradata.\n");
3336                     return AVERROR_INVALIDDATA;
3337                 }
3338
3339                 ret = decode_nal_units(s, gb.buffer, nalsize);
3340                 if (ret < 0) {
3341                     av_log(avctx, AV_LOG_ERROR,
3342                            "Decoding nal unit %d %d from hvcC failed\n",
3343                            type, i);
3344                     return ret;
3345                 }
3346                 bytestream2_skip(&gb, nalsize);
3347             }
3348         }
3349
3350         /* Now store right nal length size, that will be used to parse
3351          * all other nals */
3352         s->nal_length_size = nal_len_size;
3353     } else {
3354         s->is_nalff = 0;
3355         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3356         if (ret < 0)
3357             return ret;
3358     }
3359     return 0;
3360 }
3361
3362 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3363 {
3364     HEVCContext *s = avctx->priv_data;
3365     int ret;
3366
3367     ff_init_cabac_states();
3368
3369     avctx->internal->allocate_progress = 1;
3370
3371     ret = hevc_init_context(avctx);
3372     if (ret < 0)
3373         return ret;
3374
3375     s->enable_parallel_tiles = 0;
3376     s->picture_struct = 0;
3377
3378     if(avctx->active_thread_type & FF_THREAD_SLICE)
3379         s->threads_number = avctx->thread_count;
3380     else
3381         s->threads_number = 1;
3382
3383     if (avctx->extradata_size > 0 && avctx->extradata) {
3384         ret = hevc_decode_extradata(s);
3385         if (ret < 0) {
3386             hevc_decode_free(avctx);
3387             return ret;
3388         }
3389     }
3390
3391     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3392             s->threads_type = FF_THREAD_FRAME;
3393         else
3394             s->threads_type = FF_THREAD_SLICE;
3395
3396     return 0;
3397 }
3398
3399 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3400 {
3401     HEVCContext *s = avctx->priv_data;
3402     int ret;
3403
3404     memset(s, 0, sizeof(*s));
3405
3406     ret = hevc_init_context(avctx);
3407     if (ret < 0)
3408         return ret;
3409
3410     return 0;
3411 }
3412
3413 static void hevc_decode_flush(AVCodecContext *avctx)
3414 {
3415     HEVCContext *s = avctx->priv_data;
3416     ff_hevc_flush_dpb(s);
3417     s->max_ra = INT_MAX;
3418 }
3419
3420 #define OFFSET(x) offsetof(HEVCContext, x)
3421 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3422
3423 static const AVProfile profiles[] = {
3424     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3425     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3426     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3427     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3428     { FF_PROFILE_UNKNOWN },
3429 };
3430
3431 static const AVOption options[] = {
3432     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3433         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3434     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3435         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3436     { NULL },
3437 };
3438
3439 static const AVClass hevc_decoder_class = {
3440     .class_name = "HEVC decoder",
3441     .item_name  = av_default_item_name,
3442     .option     = options,
3443     .version    = LIBAVUTIL_VERSION_INT,
3444 };
3445
3446 AVCodec ff_hevc_decoder = {
3447     .name                  = "hevc",
3448     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3449     .type                  = AVMEDIA_TYPE_VIDEO,
3450     .id                    = AV_CODEC_ID_HEVC,
3451     .priv_data_size        = sizeof(HEVCContext),
3452     .priv_class            = &hevc_decoder_class,
3453     .init                  = hevc_decode_init,
3454     .close                 = hevc_decode_free,
3455     .decode                = hevc_decode_frame,
3456     .flush                 = hevc_decode_flush,
3457     .update_thread_context = hevc_update_thread_context,
3458     .init_thread_copy      = hevc_init_thread_copy,
3459     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3460                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3461     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3462 };