git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->ps.sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->ps.sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->ps.sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->ps.sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 284                                  const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     pic_arrays_free(s);
 336     s->ps.sps = NULL;
 337     s->ps.vps = NULL;
 338
 339     if (!sps)
 340         return 0;
 341
 342     ret = pic_arrays_init(s, sps);
 343     if (ret < 0)
 344         goto fail;
 345
 346     export_stream_params(s->avctx, &s->ps, sps);
 347
 348     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 349 #if CONFIG_HEVC_DXVA2_HWACCEL
 350         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 351 #endif
 352 #if CONFIG_HEVC_D3D11VA_HWACCEL
 353         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 354 #endif
 355 #if CONFIG_HEVC_VDPAU_HWACCEL
 356         *fmt++ = AV_PIX_FMT_VDPAU;
 357 #endif
 358     }
 359
 360     if (pix_fmt == AV_PIX_FMT_NONE) {
 361         *fmt++ = sps->pix_fmt;
 362         *fmt = AV_PIX_FMT_NONE;
 363
 364         ret = ff_thread_get_format(s->avctx, pix_fmts);
 365         if (ret < 0)
 366             goto fail;
 367         s->avctx->pix_fmt = ret;
 368     }
 369     else {
 370         s->avctx->pix_fmt = pix_fmt;
 371     }
 372
 373     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 374     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 375     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 376
 377     for (i = 0; i < 3; i++) {
 378         av_freep(&s->sao_pixel_buffer_h[i]);
 379         av_freep(&s->sao_pixel_buffer_v[i]);
 380     }
 381
 382     if (sps->sao_enabled && !s->avctx->hwaccel) {
 383         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 384         int c_idx;
 385
 386         for(c_idx = 0; c_idx < c_count; c_idx++) {
 387             int w = sps->width >> sps->hshift[c_idx];
 388             int h = sps->height >> sps->vshift[c_idx];
 389             s->sao_pixel_buffer_h[c_idx] =
 390                 av_malloc((w * 2 * sps->ctb_height) <<
 391                           sps->pixel_shift);
 392             s->sao_pixel_buffer_v[c_idx] =
 393                 av_malloc((h * 2 * sps->ctb_width) <<
 394                           sps->pixel_shift);
 395         }
 396     }
 397
 398     s->ps.sps = sps;
 399     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 400
 401     return 0;
 402
 403 fail:
 404     pic_arrays_free(s);
 405     s->ps.sps = NULL;
 406     return ret;
 407 }
 408
 409 static int hls_slice_header(HEVCContext *s)
 410 {
 411     GetBitContext *gb = &s->HEVClc->gb;
 412     SliceHeader *sh   = &s->sh;
 413     int i, ret;
 414
 415     // Coded parameters
 416     sh->first_slice_in_pic_flag = get_bits1(gb);
 417     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 418         s->seq_decode = (s->seq_decode + 1) & 0xff;
 419         s->max_ra     = INT_MAX;
 420         if (IS_IDR(s))
 421             ff_hevc_clear_refs(s);
 422     }
 423     sh->no_output_of_prior_pics_flag = 0;
 424     if (IS_IRAP(s))
 425         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 426
 427     sh->pps_id = get_ue_golomb_long(gb);
 428     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 429         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 430         return AVERROR_INVALIDDATA;
 431     }
 432     if (!sh->first_slice_in_pic_flag &&
 433         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 434         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 435         return AVERROR_INVALIDDATA;
 436     }
 437     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 438     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 439         sh->no_output_of_prior_pics_flag = 1;
 440
 441     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 442         const HEVCSPS* last_sps = s->ps.sps;
 443         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 444         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 445             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 446                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 447                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 448                 sh->no_output_of_prior_pics_flag = 0;
 449         }
 450         ff_hevc_clear_refs(s);
 451         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 452         if (ret < 0)
 453             return ret;
 454
 455         s->seq_decode = (s->seq_decode + 1) & 0xff;
 456         s->max_ra     = INT_MAX;
 457     }
 458
 459     sh->dependent_slice_segment_flag = 0;
 460     if (!sh->first_slice_in_pic_flag) {
 461         int slice_address_length;
 462
 463         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 464             sh->dependent_slice_segment_flag = get_bits1(gb);
 465
 466         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 467                                             s->ps.sps->ctb_height);
 468         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 469         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 470             av_log(s->avctx, AV_LOG_ERROR,
 471                    "Invalid slice segment address: %u.\n",
 472                    sh->slice_segment_addr);
 473             return AVERROR_INVALIDDATA;
 474         }
 475
 476         if (!sh->dependent_slice_segment_flag) {
 477             sh->slice_addr = sh->slice_segment_addr;
 478             s->slice_idx++;
 479         }
 480     } else {
 481         sh->slice_segment_addr = sh->slice_addr = 0;
 482         s->slice_idx           = 0;
 483         s->slice_initialized   = 0;
 484     }
 485
 486     if (!sh->dependent_slice_segment_flag) {
 487         s->slice_initialized = 0;
 488
 489         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 490             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 491
 492         sh->slice_type = get_ue_golomb_long(gb);
 493         if (!(sh->slice_type == I_SLICE ||
 494               sh->slice_type == P_SLICE ||
 495               sh->slice_type == B_SLICE)) {
 496             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 497                    sh->slice_type);
 498             return AVERROR_INVALIDDATA;
 499         }
 500         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 501             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 502             return AVERROR_INVALIDDATA;
 503         }
 504
 505         // when flag is not present, picture is inferred to be output
 506         sh->pic_output_flag = 1;
 507         if (s->ps.pps->output_flag_present_flag)
 508             sh->pic_output_flag = get_bits1(gb);
 509
 510         if (s->ps.sps->separate_colour_plane_flag)
 511             sh->colour_plane_id = get_bits(gb, 2);
 512
 513         if (!IS_IDR(s)) {
 514             int poc, pos;
 515
 516             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 517             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 518             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 519                 av_log(s->avctx, AV_LOG_WARNING,
 520                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 521                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 522                     return AVERROR_INVALIDDATA;
 523                 poc = s->poc;
 524             }
 525             s->poc = poc;
 526
 527             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 528             pos = get_bits_left(gb);
 529             if (!sh->short_term_ref_pic_set_sps_flag) {
 530                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 531                 if (ret < 0)
 532                     return ret;
 533
 534                 sh->short_term_rps = &sh->slice_rps;
 535             } else {
 536                 int numbits, rps_idx;
 537
 538                 if (!s->ps.sps->nb_st_rps) {
 539                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 540                     return AVERROR_INVALIDDATA;
 541                 }
 542
 543                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 544                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 545                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 546             }
 547             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 548
 549             pos = get_bits_left(gb);
 550             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 551             if (ret < 0) {
 552                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 553                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 554                     return AVERROR_INVALIDDATA;
 555             }
 556             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 557
 558             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 559                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 560             else
 561                 sh->slice_temporal_mvp_enabled_flag = 0;
 562         } else {
 563             s->sh.short_term_rps = NULL;
 564             s->poc               = 0;
 565         }
 566
 567         /* 8.3.1 */
 568         if (s->temporal_id == 0 &&
 569             s->nal_unit_type != NAL_TRAIL_N &&
 570             s->nal_unit_type != NAL_TSA_N   &&
 571             s->nal_unit_type != NAL_STSA_N  &&
 572             s->nal_unit_type != NAL_RADL_N  &&
 573             s->nal_unit_type != NAL_RADL_R  &&
 574             s->nal_unit_type != NAL_RASL_N  &&
 575             s->nal_unit_type != NAL_RASL_R)
 576             s->pocTid0 = s->poc;
 577
 578         if (s->ps.sps->sao_enabled) {
 579             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 580             if (s->ps.sps->chroma_format_idc) {
 581                 sh->slice_sample_adaptive_offset_flag[1] =
 582                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 583             }
 584         } else {
 585             sh->slice_sample_adaptive_offset_flag[0] = 0;
 586             sh->slice_sample_adaptive_offset_flag[1] = 0;
 587             sh->slice_sample_adaptive_offset_flag[2] = 0;
 588         }
 589
 590         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 591         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 592             int nb_refs;
 593
 594             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 595             if (sh->slice_type == B_SLICE)
 596                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 597
 598             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 599                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 600                 if (sh->slice_type == B_SLICE)
 601                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 602             }
 603             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 604                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 605                        sh->nb_refs[L0], sh->nb_refs[L1]);
 606                 return AVERROR_INVALIDDATA;
 607             }
 608
 609             sh->rpl_modification_flag[0] = 0;
 610             sh->rpl_modification_flag[1] = 0;
 611             nb_refs = ff_hevc_frame_nb_refs(s);
 612             if (!nb_refs) {
 613                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 614                 return AVERROR_INVALIDDATA;
 615             }
 616
 617             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 618                 sh->rpl_modification_flag[0] = get_bits1(gb);
 619                 if (sh->rpl_modification_flag[0]) {
 620                     for (i = 0; i < sh->nb_refs[L0]; i++)
 621                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 622                 }
 623
 624                 if (sh->slice_type == B_SLICE) {
 625                     sh->rpl_modification_flag[1] = get_bits1(gb);
 626                     if (sh->rpl_modification_flag[1] == 1)
 627                         for (i = 0; i < sh->nb_refs[L1]; i++)
 628                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 629                 }
 630             }
 631
 632             if (sh->slice_type == B_SLICE)
 633                 sh->mvd_l1_zero_flag = get_bits1(gb);
 634
 635             if (s->ps.pps->cabac_init_present_flag)
 636                 sh->cabac_init_flag = get_bits1(gb);
 637             else
 638                 sh->cabac_init_flag = 0;
 639
 640             sh->collocated_ref_idx = 0;
 641             if (sh->slice_temporal_mvp_enabled_flag) {
 642                 sh->collocated_list = L0;
 643                 if (sh->slice_type == B_SLICE)
 644                     sh->collocated_list = !get_bits1(gb);
 645
 646                 if (sh->nb_refs[sh->collocated_list] > 1) {
 647                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 648                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 649                         av_log(s->avctx, AV_LOG_ERROR,
 650                                "Invalid collocated_ref_idx: %d.\n",
 651                                sh->collocated_ref_idx);
 652                         return AVERROR_INVALIDDATA;
 653                     }
 654                 }
 655             }
 656
 657             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 658                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 659                 pred_weight_table(s, gb);
 660             }
 661
 662             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 663             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 664                 av_log(s->avctx, AV_LOG_ERROR,
 665                        "Invalid number of merging MVP candidates: %d.\n",
 666                        sh->max_num_merge_cand);
 667                 return AVERROR_INVALIDDATA;
 668             }
 669         }
 670
 671         sh->slice_qp_delta = get_se_golomb(gb);
 672
 673         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 674             sh->slice_cb_qp_offset = get_se_golomb(gb);
 675             sh->slice_cr_qp_offset = get_se_golomb(gb);
 676         } else {
 677             sh->slice_cb_qp_offset = 0;
 678             sh->slice_cr_qp_offset = 0;
 679         }
 680
 681         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 682             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 683         else
 684             sh->cu_chroma_qp_offset_enabled_flag = 0;
 685
 686         if (s->ps.pps->deblocking_filter_control_present_flag) {
 687             int deblocking_filter_override_flag = 0;
 688
 689             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 690                 deblocking_filter_override_flag = get_bits1(gb);
 691
 692             if (deblocking_filter_override_flag) {
 693                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 694                 if (!sh->disable_deblocking_filter_flag) {
 695                     sh->beta_offset = get_se_golomb(gb) * 2;
 696                     sh->tc_offset   = get_se_golomb(gb) * 2;
 697                 }
 698             } else {
 699                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 700                 sh->beta_offset                    = s->ps.pps->beta_offset;
 701                 sh->tc_offset                      = s->ps.pps->tc_offset;
 702             }
 703         } else {
 704             sh->disable_deblocking_filter_flag = 0;
 705             sh->beta_offset                    = 0;
 706             sh->tc_offset                      = 0;
 707         }
 708
 709         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 710             (sh->slice_sample_adaptive_offset_flag[0] ||
 711              sh->slice_sample_adaptive_offset_flag[1] ||
 712              !sh->disable_deblocking_filter_flag)) {
 713             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 714         } else {
 715             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 716         }
 717     } else if (!s->slice_initialized) {
 718         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 719         return AVERROR_INVALIDDATA;
 720     }
 721
 722     sh->num_entry_point_offsets = 0;
 723     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 724         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 725         // It would be possible to bound this tighter but this here is simpler
 726         if (num_entry_point_offsets > get_bits_left(gb)) {
 727             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 728             return AVERROR_INVALIDDATA;
 729         }
 730
 731         sh->num_entry_point_offsets = num_entry_point_offsets;
 732         if (sh->num_entry_point_offsets > 0) {
 733             int offset_len = get_ue_golomb_long(gb) + 1;
 734
 735             if (offset_len < 1 || offset_len > 32) {
 736                 sh->num_entry_point_offsets = 0;
 737                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 738                 return AVERROR_INVALIDDATA;
 739             }
 740
 741             av_freep(&sh->entry_point_offset);
 742             av_freep(&sh->offset);
 743             av_freep(&sh->size);
 744             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 745             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 746             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 747             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 748                 sh->num_entry_point_offsets = 0;
 749                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 750                 return AVERROR(ENOMEM);
 751             }
 752             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 753                 unsigned val = get_bits_long(gb, offset_len);
 754                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 755             }
 756             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 757                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 758                 s->threads_number = 1;
 759             } else
 760                 s->enable_parallel_tiles = 0;
 761         } else
 762             s->enable_parallel_tiles = 0;
 763     }
 764
 765     if (s->ps.pps->slice_header_extension_present_flag) {
 766         unsigned int length = get_ue_golomb_long(gb);
 767         if (length*8LL > get_bits_left(gb)) {
 768             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 769             return AVERROR_INVALIDDATA;
 770         }
 771         for (i = 0; i < length; i++)
 772             skip_bits(gb, 8);  // slice_header_extension_data_byte
 773     }
 774
 775     // Inferred parameters
 776     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 777     if (sh->slice_qp > 51 ||
 778         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 779         av_log(s->avctx, AV_LOG_ERROR,
 780                "The slice_qp %d is outside the valid range "
 781                "[%d, 51].\n",
 782                sh->slice_qp,
 783                -s->ps.sps->qp_bd_offset);
 784         return AVERROR_INVALIDDATA;
 785     }
 786
 787     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 788
 789     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 790         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 791         return AVERROR_INVALIDDATA;
 792     }
 793
 794     if (get_bits_left(gb) < 0) {
 795         av_log(s->avctx, AV_LOG_ERROR,
 796                "Overread slice header by %d bits\n", -get_bits_left(gb));
 797         return AVERROR_INVALIDDATA;
 798     }
 799
 800     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 801
 802     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 803         s->HEVClc->qp_y = s->sh.slice_qp;
 804
 805     s->slice_initialized = 1;
 806     s->HEVClc->tu.cu_qp_offset_cb = 0;
 807     s->HEVClc->tu.cu_qp_offset_cr = 0;
 808
 809     return 0;
 810 }
 811
 812 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 813
 814 #define SET_SAO(elem, value)                            \
 815 do {                                                    \
 816     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 817         sao->elem = value;                              \
 818     else if (sao_merge_left_flag)                       \
 819         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 820     else if (sao_merge_up_flag)                         \
 821         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 822     else                                                \
 823         sao->elem = 0;                                  \
 824 } while (0)
 825
 826 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 827 {
 828     HEVCLocalContext *lc    = s->HEVClc;
 829     int sao_merge_left_flag = 0;
 830     int sao_merge_up_flag   = 0;
 831     SAOParams *sao          = &CTB(s->sao, rx, ry);
 832     int c_idx, i;
 833
 834     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 835         s->sh.slice_sample_adaptive_offset_flag[1]) {
 836         if (rx > 0) {
 837             if (lc->ctb_left_flag)
 838                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 839         }
 840         if (ry > 0 && !sao_merge_left_flag) {
 841             if (lc->ctb_up_flag)
 842                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 843         }
 844     }
 845
 846     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 847         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 848                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 849
 850         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 851             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 852             continue;
 853         }
 854
 855         if (c_idx == 2) {
 856             sao->type_idx[2] = sao->type_idx[1];
 857             sao->eo_class[2] = sao->eo_class[1];
 858         } else {
 859             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 860         }
 861
 862         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 863             continue;
 864
 865         for (i = 0; i < 4; i++)
 866             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 867
 868         if (sao->type_idx[c_idx] == SAO_BAND) {
 869             for (i = 0; i < 4; i++) {
 870                 if (sao->offset_abs[c_idx][i]) {
 871                     SET_SAO(offset_sign[c_idx][i],
 872                             ff_hevc_sao_offset_sign_decode(s));
 873                 } else {
 874                     sao->offset_sign[c_idx][i] = 0;
 875                 }
 876             }
 877             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 878         } else if (c_idx != 2) {
 879             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 880         }
 881
 882         // Inferred parameters
 883         sao->offset_val[c_idx][0] = 0;
 884         for (i = 0; i < 4; i++) {
 885             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 886             if (sao->type_idx[c_idx] == SAO_EDGE) {
 887                 if (i > 1)
 888                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 889             } else if (sao->offset_sign[c_idx][i]) {
 890                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 891             }
 892             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 893         }
 894     }
 895 }
 896
 897 #undef SET_SAO
 898 #undef CTB
 899
 900 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 901     HEVCLocalContext *lc    = s->HEVClc;
 902     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 903
 904     if (log2_res_scale_abs_plus1 !=  0) {
 905         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 906         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 907                                (1 - 2 * res_scale_sign_flag);
 908     } else {
 909         lc->tu.res_scale_val = 0;
 910     }
 911
 912
 913     return 0;
 914 }
 915
 916 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 917                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 918                               int log2_cb_size, int log2_trafo_size,
 919                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 920 {
 921     HEVCLocalContext *lc = s->HEVClc;
 922     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 923     int i;
 924
 925     if (lc->cu.pred_mode == MODE_INTRA) {
 926         int trafo_size = 1 << log2_trafo_size;
 927         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 928
 929         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 930     }
 931
 932     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 933         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 934         int scan_idx   = SCAN_DIAG;
 935         int scan_idx_c = SCAN_DIAG;
 936         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 937                          (s->ps.sps->chroma_format_idc == 2 &&
 938                          (cbf_cb[1] || cbf_cr[1]));
 939
 940         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 941             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 942             if (lc->tu.cu_qp_delta != 0)
 943                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 944                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 945             lc->tu.is_cu_qp_delta_coded = 1;
 946
 947             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 948                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 949                 av_log(s->avctx, AV_LOG_ERROR,
 950                        "The cu_qp_delta %d is outside the valid range "
 951                        "[%d, %d].\n",
 952                        lc->tu.cu_qp_delta,
 953                        -(26 + s->ps.sps->qp_bd_offset / 2),
 954                         (25 + s->ps.sps->qp_bd_offset / 2));
 955                 return AVERROR_INVALIDDATA;
 956             }
 957
 958             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 959         }
 960
 961         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 962             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 963             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 964             if (cu_chroma_qp_offset_flag) {
 965                 int cu_chroma_qp_offset_idx  = 0;
 966                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 967                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 968                     av_log(s->avctx, AV_LOG_ERROR,
 969                         "cu_chroma_qp_offset_idx not yet tested.\n");
 970                 }
 971                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 972                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 973             } else {
 974                 lc->tu.cu_qp_offset_cb = 0;
 975                 lc->tu.cu_qp_offset_cr = 0;
 976             }
 977             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 978         }
 979
 980         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 981             if (lc->tu.intra_pred_mode >= 6 &&
 982                 lc->tu.intra_pred_mode <= 14) {
 983                 scan_idx = SCAN_VERT;
 984             } else if (lc->tu.intra_pred_mode >= 22 &&
 985                        lc->tu.intra_pred_mode <= 30) {
 986                 scan_idx = SCAN_HORIZ;
 987             }
 988
 989             if (lc->tu.intra_pred_mode_c >=  6 &&
 990                 lc->tu.intra_pred_mode_c <= 14) {
 991                 scan_idx_c = SCAN_VERT;
 992             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 993                        lc->tu.intra_pred_mode_c <= 30) {
 994                 scan_idx_c = SCAN_HORIZ;
 995             }
 996         }
 997
 998         lc->tu.cross_pf = 0;
 999
1000         if (cbf_luma)
1001             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1002         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1003             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1004             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1005             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1006                                 (lc->cu.pred_mode == MODE_INTER ||
1007                                  (lc->tu.chroma_mode_c ==  4)));
1008
1009             if (lc->tu.cross_pf) {
1010                 hls_cross_component_pred(s, 0);
1011             }
1012             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1013                 if (lc->cu.pred_mode == MODE_INTRA) {
1014                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1015                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1016                 }
1017                 if (cbf_cb[i])
1018                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1019                                                 log2_trafo_size_c, scan_idx_c, 1);
1020                 else
1021                     if (lc->tu.cross_pf) {
1022                         ptrdiff_t stride = s->frame->linesize[1];
1023                         int hshift = s->ps.sps->hshift[1];
1024                         int vshift = s->ps.sps->vshift[1];
1025                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1026                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1027                         int size = 1 << log2_trafo_size_c;
1028
1029                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1030                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1031                         for (i = 0; i < (size * size); i++) {
1032                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1033                         }
1034                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1035                     }
1036             }
1037
1038             if (lc->tu.cross_pf) {
1039                 hls_cross_component_pred(s, 1);
1040             }
1041             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1042                 if (lc->cu.pred_mode == MODE_INTRA) {
1043                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1044                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1045                 }
1046                 if (cbf_cr[i])
1047                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1048                                                 log2_trafo_size_c, scan_idx_c, 2);
1049                 else
1050                     if (lc->tu.cross_pf) {
1051                         ptrdiff_t stride = s->frame->linesize[2];
1052                         int hshift = s->ps.sps->hshift[2];
1053                         int vshift = s->ps.sps->vshift[2];
1054                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1055                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1056                         int size = 1 << log2_trafo_size_c;
1057
1058                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1059                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1060                         for (i = 0; i < (size * size); i++) {
1061                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1062                         }
1063                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1064                     }
1065             }
1066         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1067             int trafo_size_h = 1 << (log2_trafo_size + 1);
1068             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1069             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1070                 if (lc->cu.pred_mode == MODE_INTRA) {
1071                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1072                                                     trafo_size_h, trafo_size_v);
1073                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1074                 }
1075                 if (cbf_cb[i])
1076                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1077                                                 log2_trafo_size, scan_idx_c, 1);
1078             }
1079             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1080                 if (lc->cu.pred_mode == MODE_INTRA) {
1081                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1082                                                 trafo_size_h, trafo_size_v);
1083                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1084                 }
1085                 if (cbf_cr[i])
1086                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1087                                                 log2_trafo_size, scan_idx_c, 2);
1088             }
1089         }
1090     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1091         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1092             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1093             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1094             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1095             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1096             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1097             if (s->ps.sps->chroma_format_idc == 2) {
1098                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1099                                                 trafo_size_h, trafo_size_v);
1100                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1101                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1102             }
1103         } else if (blk_idx == 3) {
1104             int trafo_size_h = 1 << (log2_trafo_size + 1);
1105             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1106             ff_hevc_set_neighbour_available(s, xBase, yBase,
1107                                             trafo_size_h, trafo_size_v);
1108             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1109             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1110             if (s->ps.sps->chroma_format_idc == 2) {
1111                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1112                                                 trafo_size_h, trafo_size_v);
1113                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1114                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1115             }
1116         }
1117     }
1118
1119     return 0;
1120 }
1121
1122 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1123 {
1124     int cb_size          = 1 << log2_cb_size;
1125     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1126
1127     int min_pu_width     = s->ps.sps->min_pu_width;
1128     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1129     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1130     int i, j;
1131
1132     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1133         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1134             s->is_pcm[i + j * min_pu_width] = 2;
1135 }
1136
1137 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1138                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1139                               int log2_cb_size, int log2_trafo_size,
1140                               int trafo_depth, int blk_idx,
1141                               const int *base_cbf_cb, const int *base_cbf_cr)
1142 {
1143     HEVCLocalContext *lc = s->HEVClc;
1144     uint8_t split_transform_flag;
1145     int cbf_cb[2];
1146     int cbf_cr[2];
1147     int ret;
1148
1149     cbf_cb[0] = base_cbf_cb[0];
1150     cbf_cb[1] = base_cbf_cb[1];
1151     cbf_cr[0] = base_cbf_cr[0];
1152     cbf_cr[1] = base_cbf_cr[1];
1153
1154     if (lc->cu.intra_split_flag) {
1155         if (trafo_depth == 1) {
1156             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1157             if (s->ps.sps->chroma_format_idc == 3) {
1158                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1159                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1160             } else {
1161                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1162                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1163             }
1164         }
1165     } else {
1166         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1167         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1168         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1169     }
1170
1171     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1172         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1173         trafo_depth     < lc->cu.max_trafo_depth       &&
1174         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1175         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1176     } else {
1177         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1178                           lc->cu.pred_mode == MODE_INTER &&
1179                           lc->cu.part_mode != PART_2Nx2N &&
1180                           trafo_depth == 0;
1181
1182         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1183                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1184                                inter_split;
1185     }
1186
1187     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1188         if (trafo_depth == 0 || cbf_cb[0]) {
1189             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1190             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1191                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1192             }
1193         }
1194
1195         if (trafo_depth == 0 || cbf_cr[0]) {
1196             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1197             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1198                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1199             }
1200         }
1201     }
1202
1203     if (split_transform_flag) {
1204         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1205         const int x1 = x0 + trafo_size_split;
1206         const int y1 = y0 + trafo_size_split;
1207
1208 #define SUBDIVIDE(x, y, idx)                                                    \
1209 do {                                                                            \
1210     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1211                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1212                              cbf_cb, cbf_cr);                                   \
1213     if (ret < 0)                                                                \
1214         return ret;                                                             \
1215 } while (0)
1216
1217         SUBDIVIDE(x0, y0, 0);
1218         SUBDIVIDE(x1, y0, 1);
1219         SUBDIVIDE(x0, y1, 2);
1220         SUBDIVIDE(x1, y1, 3);
1221
1222 #undef SUBDIVIDE
1223     } else {
1224         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1225         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1226         int min_tu_width     = s->ps.sps->min_tb_width;
1227         int cbf_luma         = 1;
1228
1229         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1230             cbf_cb[0] || cbf_cr[0] ||
1231             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1232             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1233         }
1234
1235         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1236                                  log2_cb_size, log2_trafo_size,
1237                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1238         if (ret < 0)
1239             return ret;
1240         // TODO: store cbf_luma somewhere else
1241         if (cbf_luma) {
1242             int i, j;
1243             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1244                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1245                     int x_tu = (x0 + j) >> log2_min_tu_size;
1246                     int y_tu = (y0 + i) >> log2_min_tu_size;
1247                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1248                 }
1249         }
1250         if (!s->sh.disable_deblocking_filter_flag) {
1251             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1252             if (s->ps.pps->transquant_bypass_enable_flag &&
1253                 lc->cu.cu_transquant_bypass_flag)
1254                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1255         }
1256     }
1257     return 0;
1258 }
1259
1260 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1261 {
1262     HEVCLocalContext *lc = s->HEVClc;
1263     GetBitContext gb;
1264     int cb_size   = 1 << log2_cb_size;
1265     int stride0   = s->frame->linesize[0];
1266     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1267     int   stride1 = s->frame->linesize[1];
1268     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1269     int   stride2 = s->frame->linesize[2];
1270     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1271
1272     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1273                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1274                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1275                           s->ps.sps->pcm.bit_depth_chroma;
1276     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1277     int ret;
1278
1279     if (!s->sh.disable_deblocking_filter_flag)
1280         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1281
1282     ret = init_get_bits(&gb, pcm, length);
1283     if (ret < 0)
1284         return ret;
1285
1286     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1287     if (s->ps.sps->chroma_format_idc) {
1288         s->hevcdsp.put_pcm(dst1, stride1,
1289                            cb_size >> s->ps.sps->hshift[1],
1290                            cb_size >> s->ps.sps->vshift[1],
1291                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1292         s->hevcdsp.put_pcm(dst2, stride2,
1293                            cb_size >> s->ps.sps->hshift[2],
1294                            cb_size >> s->ps.sps->vshift[2],
1295                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1296     }
1297
1298     return 0;
1299 }
1300
1301 /**
1302  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1303  *
1304  * @param s HEVC decoding context
1305  * @param dst target buffer for block data at block position
1306  * @param dststride stride of the dst buffer
1307  * @param ref reference picture buffer at origin (0, 0)
1308  * @param mv motion vector (relative to block position) to get pixel data from
1309  * @param x_off horizontal position of block from origin (0, 0)
1310  * @param y_off vertical position of block from origin (0, 0)
1311  * @param block_w width of block
1312  * @param block_h height of block
1313  * @param luma_weight weighting factor applied to the luma prediction
1314  * @param luma_offset additive offset applied to the luma prediction value
1315  */
1316
1317 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1318                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1319                         int block_w, int block_h, int luma_weight, int luma_offset)
1320 {
1321     HEVCLocalContext *lc = s->HEVClc;
1322     uint8_t *src         = ref->data[0];
1323     ptrdiff_t srcstride  = ref->linesize[0];
1324     int pic_width        = s->ps.sps->width;
1325     int pic_height       = s->ps.sps->height;
1326     int mx               = mv->x & 3;
1327     int my               = mv->y & 3;
1328     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1329                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1330     int idx              = ff_hevc_pel_weight[block_w];
1331
1332     x_off += mv->x >> 2;
1333     y_off += mv->y >> 2;
1334     src   += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1335
1336     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1337         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1338         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1339         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1340         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1341         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1342
1343         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1344                                  edge_emu_stride, srcstride,
1345                                  block_w + QPEL_EXTRA,
1346                                  block_h + QPEL_EXTRA,
1347                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1348                                  pic_width, pic_height);
1349         src = lc->edge_emu_buffer + buf_offset;
1350         srcstride = edge_emu_stride;
1351     }
1352
1353     if (!weight_flag)
1354         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1355                                                       block_h, mx, my, block_w);
1356     else
1357         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1358                                                         block_h, s->sh.luma_log2_weight_denom,
1359                                                         luma_weight, luma_offset, mx, my, block_w);
1360 }
1361
1362 /**
1363  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1364  *
1365  * @param s HEVC decoding context
1366  * @param dst target buffer for block data at block position
1367  * @param dststride stride of the dst buffer
1368  * @param ref0 reference picture0 buffer at origin (0, 0)
1369  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1370  * @param x_off horizontal position of block from origin (0, 0)
1371  * @param y_off vertical position of block from origin (0, 0)
1372  * @param block_w width of block
1373  * @param block_h height of block
1374  * @param ref1 reference picture1 buffer at origin (0, 0)
1375  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1376  * @param current_mv current motion vector structure
1377  */
1378  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1379                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1380                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1381 {
1382     HEVCLocalContext *lc = s->HEVClc;
1383     ptrdiff_t src0stride  = ref0->linesize[0];
1384     ptrdiff_t src1stride  = ref1->linesize[0];
1385     int pic_width        = s->ps.sps->width;
1386     int pic_height       = s->ps.sps->height;
1387     int mx0              = mv0->x & 3;
1388     int my0              = mv0->y & 3;
1389     int mx1              = mv1->x & 3;
1390     int my1              = mv1->y & 3;
1391     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1392                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1393     int x_off0           = x_off + (mv0->x >> 2);
1394     int y_off0           = y_off + (mv0->y >> 2);
1395     int x_off1           = x_off + (mv1->x >> 2);
1396     int y_off1           = y_off + (mv1->y >> 2);
1397     int idx              = ff_hevc_pel_weight[block_w];
1398
1399     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1400     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1401
1402     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1403         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1404         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1405         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1406         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1407         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1408
1409         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1410                                  edge_emu_stride, src0stride,
1411                                  block_w + QPEL_EXTRA,
1412                                  block_h + QPEL_EXTRA,
1413                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1414                                  pic_width, pic_height);
1415         src0 = lc->edge_emu_buffer + buf_offset;
1416         src0stride = edge_emu_stride;
1417     }
1418
1419     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1420         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1421         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1422         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1423         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1424         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1425
1426         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1427                                  edge_emu_stride, src1stride,
1428                                  block_w + QPEL_EXTRA,
1429                                  block_h + QPEL_EXTRA,
1430                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1431                                  pic_width, pic_height);
1432         src1 = lc->edge_emu_buffer2 + buf_offset;
1433         src1stride = edge_emu_stride;
1434     }
1435
1436     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1437                                                 block_h, mx0, my0, block_w);
1438     if (!weight_flag)
1439         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1440                                                        block_h, mx1, my1, block_w);
1441     else
1442         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1443                                                          block_h, s->sh.luma_log2_weight_denom,
1444                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1445                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1446                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1447                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1448                                                          mx1, my1, block_w);
1449
1450 }
1451
1452 /**
1453  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1454  *
1455  * @param s HEVC decoding context
1456  * @param dst1 target buffer for block data at block position (U plane)
1457  * @param dst2 target buffer for block data at block position (V plane)
1458  * @param dststride stride of the dst1 and dst2 buffers
1459  * @param ref reference picture buffer at origin (0, 0)
1460  * @param mv motion vector (relative to block position) to get pixel data from
1461  * @param x_off horizontal position of block from origin (0, 0)
1462  * @param y_off vertical position of block from origin (0, 0)
1463  * @param block_w width of block
1464  * @param block_h height of block
1465  * @param chroma_weight weighting factor applied to the chroma prediction
1466  * @param chroma_offset additive offset applied to the chroma prediction value
1467  */
1468
1469 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1470                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1471                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1472 {
1473     HEVCLocalContext *lc = s->HEVClc;
1474     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1475     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1476     const Mv *mv         = &current_mv->mv[reflist];
1477     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1478                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1479     int idx              = ff_hevc_pel_weight[block_w];
1480     int hshift           = s->ps.sps->hshift[1];
1481     int vshift           = s->ps.sps->vshift[1];
1482     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1483     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1484     intptr_t _mx         = mx << (1 - hshift);
1485     intptr_t _my         = my << (1 - vshift);
1486
1487     x_off += mv->x >> (2 + hshift);
1488     y_off += mv->y >> (2 + vshift);
1489     src0  += y_off * srcstride + x_off * (1 << s->ps.sps->pixel_shift);
1490
1491     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1492         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1493         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1494         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1495         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1496         int buf_offset0 = EPEL_EXTRA_BEFORE *
1497                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1498         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1499                                  edge_emu_stride, srcstride,
1500                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1501                                  x_off - EPEL_EXTRA_BEFORE,
1502                                  y_off - EPEL_EXTRA_BEFORE,
1503                                  pic_width, pic_height);
1504
1505         src0 = lc->edge_emu_buffer + buf_offset0;
1506         srcstride = edge_emu_stride;
1507     }
1508     if (!weight_flag)
1509         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1510                                                   block_h, _mx, _my, block_w);
1511     else
1512         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1513                                                         block_h, s->sh.chroma_log2_weight_denom,
1514                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1515 }
1516
1517 /**
1518  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1519  *
1520  * @param s HEVC decoding context
1521  * @param dst target buffer for block data at block position
1522  * @param dststride stride of the dst buffer
1523  * @param ref0 reference picture0 buffer at origin (0, 0)
1524  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1525  * @param x_off horizontal position of block from origin (0, 0)
1526  * @param y_off vertical position of block from origin (0, 0)
1527  * @param block_w width of block
1528  * @param block_h height of block
1529  * @param ref1 reference picture1 buffer at origin (0, 0)
1530  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1531  * @param current_mv current motion vector structure
1532  * @param cidx chroma component(cb, cr)
1533  */
1534 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1535                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1536 {
1537     HEVCLocalContext *lc = s->HEVClc;
1538     uint8_t *src1        = ref0->data[cidx+1];
1539     uint8_t *src2        = ref1->data[cidx+1];
1540     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1541     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1542     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1543                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1544     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1545     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1546     Mv *mv0              = &current_mv->mv[0];
1547     Mv *mv1              = &current_mv->mv[1];
1548     int hshift = s->ps.sps->hshift[1];
1549     int vshift = s->ps.sps->vshift[1];
1550
1551     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1552     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1553     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1554     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1555     intptr_t _mx0 = mx0 << (1 - hshift);
1556     intptr_t _my0 = my0 << (1 - vshift);
1557     intptr_t _mx1 = mx1 << (1 - hshift);
1558     intptr_t _my1 = my1 << (1 - vshift);
1559
1560     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1561     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1562     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1563     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1564     int idx = ff_hevc_pel_weight[block_w];
1565     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1566     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1567
1568     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1569         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1570         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1571         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1572         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1573         int buf_offset1 = EPEL_EXTRA_BEFORE *
1574                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1575
1576         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1577                                  edge_emu_stride, src1stride,
1578                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1579                                  x_off0 - EPEL_EXTRA_BEFORE,
1580                                  y_off0 - EPEL_EXTRA_BEFORE,
1581                                  pic_width, pic_height);
1582
1583         src1 = lc->edge_emu_buffer + buf_offset1;
1584         src1stride = edge_emu_stride;
1585     }
1586
1587     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1588         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1589         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1590         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1591         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1592         int buf_offset1 = EPEL_EXTRA_BEFORE *
1593                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1594
1595         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1596                                  edge_emu_stride, src2stride,
1597                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1598                                  x_off1 - EPEL_EXTRA_BEFORE,
1599                                  y_off1 - EPEL_EXTRA_BEFORE,
1600                                  pic_width, pic_height);
1601
1602         src2 = lc->edge_emu_buffer2 + buf_offset1;
1603         src2stride = edge_emu_stride;
1604     }
1605
1606     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1607                                                 block_h, _mx0, _my0, block_w);
1608     if (!weight_flag)
1609         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1610                                                        src2, src2stride, lc->tmp,
1611                                                        block_h, _mx1, _my1, block_w);
1612     else
1613         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1614                                                          src2, src2stride, lc->tmp,
1615                                                          block_h,
1616                                                          s->sh.chroma_log2_weight_denom,
1617                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1618                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1619                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1620                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1621                                                          _mx1, _my1, block_w);
1622 }
1623
1624 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1625                                 const Mv *mv, int y0, int height)
1626 {
1627     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1628
1629     if (s->threads_type == FF_THREAD_FRAME )
1630         ff_thread_await_progress(&ref->tf, y, 0);
1631 }
1632
1633 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1634                                   int nPbH, int log2_cb_size, int part_idx,
1635                                   int merge_idx, MvField *mv)
1636 {
1637     HEVCLocalContext *lc = s->HEVClc;
1638     enum InterPredIdc inter_pred_idc = PRED_L0;
1639     int mvp_flag;
1640
1641     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1642     mv->pred_flag = 0;
1643     if (s->sh.slice_type == B_SLICE)
1644         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1645
1646     if (inter_pred_idc != PRED_L1) {
1647         if (s->sh.nb_refs[L0])
1648             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1649
1650         mv->pred_flag = PF_L0;
1651         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1652         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1653         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1654                                  part_idx, merge_idx, mv, mvp_flag, 0);
1655         mv->mv[0].x += lc->pu.mvd.x;
1656         mv->mv[0].y += lc->pu.mvd.y;
1657     }
1658
1659     if (inter_pred_idc != PRED_L0) {
1660         if (s->sh.nb_refs[L1])
1661             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1662
1663         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1664             AV_ZERO32(&lc->pu.mvd);
1665         } else {
1666             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1667         }
1668
1669         mv->pred_flag += PF_L1;
1670         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1671         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1672                                  part_idx, merge_idx, mv, mvp_flag, 1);
1673         mv->mv[1].x += lc->pu.mvd.x;
1674         mv->mv[1].y += lc->pu.mvd.y;
1675     }
1676 }
1677
1678 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1679                                 int nPbW, int nPbH,
1680                                 int log2_cb_size, int partIdx, int idx)
1681 {
1682 #define POS(c_idx, x, y)                                                              \
1683     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1684                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1685     HEVCLocalContext *lc = s->HEVClc;
1686     int merge_idx = 0;
1687     struct MvField current_mv = {{{ 0 }}};
1688
1689     int min_pu_width = s->ps.sps->min_pu_width;
1690
1691     MvField *tab_mvf = s->ref->tab_mvf;
1692     RefPicList  *refPicList = s->ref->refPicList;
1693     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1694     uint8_t *dst0 = POS(0, x0, y0);
1695     uint8_t *dst1 = POS(1, x0, y0);
1696     uint8_t *dst2 = POS(2, x0, y0);
1697     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1698     int min_cb_width     = s->ps.sps->min_cb_width;
1699     int x_cb             = x0 >> log2_min_cb_size;
1700     int y_cb             = y0 >> log2_min_cb_size;
1701     int x_pu, y_pu;
1702     int i, j;
1703
1704     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1705
1706     if (!skip_flag)
1707         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1708
1709     if (skip_flag || lc->pu.merge_flag) {
1710         if (s->sh.max_num_merge_cand > 1)
1711             merge_idx = ff_hevc_merge_idx_decode(s);
1712         else
1713             merge_idx = 0;
1714
1715         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1716                                    partIdx, merge_idx, &current_mv);
1717     } else {
1718         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1719                               partIdx, merge_idx, &current_mv);
1720     }
1721
1722     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1723     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1724
1725     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1726         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1727             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1728
1729     if (current_mv.pred_flag & PF_L0) {
1730         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1731         if (!ref0)
1732             return;
1733         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1734     }
1735     if (current_mv.pred_flag & PF_L1) {
1736         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1737         if (!ref1)
1738             return;
1739         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1740     }
1741
1742     if (current_mv.pred_flag == PF_L0) {
1743         int x0_c = x0 >> s->ps.sps->hshift[1];
1744         int y0_c = y0 >> s->ps.sps->vshift[1];
1745         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1746         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1747
1748         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1749                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1750                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1751                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1752
1753         if (s->ps.sps->chroma_format_idc) {
1754             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1755                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1756                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1757             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1758                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1759                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1760         }
1761     } else if (current_mv.pred_flag == PF_L1) {
1762         int x0_c = x0 >> s->ps.sps->hshift[1];
1763         int y0_c = y0 >> s->ps.sps->vshift[1];
1764         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1765         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1766
1767         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1768                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1769                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1770                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1771
1772         if (s->ps.sps->chroma_format_idc) {
1773             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1774                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1775                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1776
1777             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1778                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1779                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1780         }
1781     } else if (current_mv.pred_flag == PF_BI) {
1782         int x0_c = x0 >> s->ps.sps->hshift[1];
1783         int y0_c = y0 >> s->ps.sps->vshift[1];
1784         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1785         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1786
1787         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1788                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1789                    ref1->frame, &current_mv.mv[1], &current_mv);
1790
1791         if (s->ps.sps->chroma_format_idc) {
1792             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1793                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1794
1795             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1796                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1797         }
1798     }
1799 }
1800
1801 /**
1802  * 8.4.1
1803  */
1804 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1805                                 int prev_intra_luma_pred_flag)
1806 {
1807     HEVCLocalContext *lc = s->HEVClc;
1808     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1809     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1810     int min_pu_width     = s->ps.sps->min_pu_width;
1811     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1812     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1813     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1814
1815     int cand_up   = (lc->ctb_up_flag || y0b) ?
1816                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1817     int cand_left = (lc->ctb_left_flag || x0b) ?
1818                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1819
1820     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1821
1822     MvField *tab_mvf = s->ref->tab_mvf;
1823     int intra_pred_mode;
1824     int candidate[3];
1825     int i, j;
1826
1827     // intra_pred_mode prediction does not cross vertical CTB boundaries
1828     if ((y0 - 1) < y_ctb)
1829         cand_up = INTRA_DC;
1830
1831     if (cand_left == cand_up) {
1832         if (cand_left < 2) {
1833             candidate[0] = INTRA_PLANAR;
1834             candidate[1] = INTRA_DC;
1835             candidate[2] = INTRA_ANGULAR_26;
1836         } else {
1837             candidate[0] = cand_left;
1838             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1839             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1840         }
1841     } else {
1842         candidate[0] = cand_left;
1843         candidate[1] = cand_up;
1844         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1845             candidate[2] = INTRA_PLANAR;
1846         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1847             candidate[2] = INTRA_DC;
1848         } else {
1849             candidate[2] = INTRA_ANGULAR_26;
1850         }
1851     }
1852
1853     if (prev_intra_luma_pred_flag) {
1854         intra_pred_mode = candidate[lc->pu.mpm_idx];
1855     } else {
1856         if (candidate[0] > candidate[1])
1857             FFSWAP(uint8_t, candidate[0], candidate[1]);
1858         if (candidate[0] > candidate[2])
1859             FFSWAP(uint8_t, candidate[0], candidate[2]);
1860         if (candidate[1] > candidate[2])
1861             FFSWAP(uint8_t, candidate[1], candidate[2]);
1862
1863         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1864         for (i = 0; i < 3; i++)
1865             if (intra_pred_mode >= candidate[i])
1866                 intra_pred_mode++;
1867     }
1868
1869     /* write the intra prediction units into the mv array */
1870     if (!size_in_pus)
1871         size_in_pus = 1;
1872     for (i = 0; i < size_in_pus; i++) {
1873         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1874                intra_pred_mode, size_in_pus);
1875
1876         for (j = 0; j < size_in_pus; j++) {
1877             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1878         }
1879     }
1880
1881     return intra_pred_mode;
1882 }
1883
1884 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1885                                           int log2_cb_size, int ct_depth)
1886 {
1887     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1888     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1889     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1890     int y;
1891
1892     for (y = 0; y < length; y++)
1893         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1894                ct_depth, length);
1895 }
1896
1897 static const uint8_t tab_mode_idx[] = {
1898      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1899     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1900
1901 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1902                                   int log2_cb_size)
1903 {
1904     HEVCLocalContext *lc = s->HEVClc;
1905     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1906     uint8_t prev_intra_luma_pred_flag[4];
1907     int split   = lc->cu.part_mode == PART_NxN;
1908     int pb_size = (1 << log2_cb_size) >> split;
1909     int side    = split + 1;
1910     int chroma_mode;
1911     int i, j;
1912
1913     for (i = 0; i < side; i++)
1914         for (j = 0; j < side; j++)
1915             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1916
1917     for (i = 0; i < side; i++) {
1918         for (j = 0; j < side; j++) {
1919             if (prev_intra_luma_pred_flag[2 * i + j])
1920                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1921             else
1922                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1923
1924             lc->pu.intra_pred_mode[2 * i + j] =
1925                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1926                                      prev_intra_luma_pred_flag[2 * i + j]);
1927         }
1928     }
1929
1930     if (s->ps.sps->chroma_format_idc == 3) {
1931         for (i = 0; i < side; i++) {
1932             for (j = 0; j < side; j++) {
1933                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1934                 if (chroma_mode != 4) {
1935                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1936                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1937                     else
1938                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1939                 } else {
1940                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1941                 }
1942             }
1943         }
1944     } else if (s->ps.sps->chroma_format_idc == 2) {
1945         int mode_idx;
1946         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1947         if (chroma_mode != 4) {
1948             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1949                 mode_idx = 34;
1950             else
1951                 mode_idx = intra_chroma_table[chroma_mode];
1952         } else {
1953             mode_idx = lc->pu.intra_pred_mode[0];
1954         }
1955         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1956     } else if (s->ps.sps->chroma_format_idc != 0) {
1957         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1958         if (chroma_mode != 4) {
1959             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1960                 lc->pu.intra_pred_mode_c[0] = 34;
1961             else
1962                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1963         } else {
1964             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1965         }
1966     }
1967 }
1968
1969 static void intra_prediction_unit_default_value(HEVCContext *s,
1970                                                 int x0, int y0,
1971                                                 int log2_cb_size)
1972 {
1973     HEVCLocalContext *lc = s->HEVClc;
1974     int pb_size          = 1 << log2_cb_size;
1975     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1976     int min_pu_width     = s->ps.sps->min_pu_width;
1977     MvField *tab_mvf     = s->ref->tab_mvf;
1978     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1979     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1980     int j, k;
1981
1982     if (size_in_pus == 0)
1983         size_in_pus = 1;
1984     for (j = 0; j < size_in_pus; j++)
1985         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1986     if (lc->cu.pred_mode == MODE_INTRA)
1987         for (j = 0; j < size_in_pus; j++)
1988             for (k = 0; k < size_in_pus; k++)
1989                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1990 }
1991
1992 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1993 {
1994     int cb_size          = 1 << log2_cb_size;
1995     HEVCLocalContext *lc = s->HEVClc;
1996     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1997     int length           = cb_size >> log2_min_cb_size;
1998     int min_cb_width     = s->ps.sps->min_cb_width;
1999     int x_cb             = x0 >> log2_min_cb_size;
2000     int y_cb             = y0 >> log2_min_cb_size;
2001     int idx              = log2_cb_size - 2;
2002     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2003     int x, y, ret;
2004
2005     lc->cu.x                = x0;
2006     lc->cu.y                = y0;
2007     lc->cu.pred_mode        = MODE_INTRA;
2008     lc->cu.part_mode        = PART_2Nx2N;
2009     lc->cu.intra_split_flag = 0;
2010
2011     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2012     for (x = 0; x < 4; x++)
2013         lc->pu.intra_pred_mode[x] = 1;
2014     if (s->ps.pps->transquant_bypass_enable_flag) {
2015         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2016         if (lc->cu.cu_transquant_bypass_flag)
2017             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2018     } else
2019         lc->cu.cu_transquant_bypass_flag = 0;
2020
2021     if (s->sh.slice_type != I_SLICE) {
2022         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2023
2024         x = y_cb * min_cb_width + x_cb;
2025         for (y = 0; y < length; y++) {
2026             memset(&s->skip_flag[x], skip_flag, length);
2027             x += min_cb_width;
2028         }
2029         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2030     } else {
2031         x = y_cb * min_cb_width + x_cb;
2032         for (y = 0; y < length; y++) {
2033             memset(&s->skip_flag[x], 0, length);
2034             x += min_cb_width;
2035         }
2036     }
2037
2038     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2039         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2040         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2041
2042         if (!s->sh.disable_deblocking_filter_flag)
2043             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2044     } else {
2045         int pcm_flag = 0;
2046
2047         if (s->sh.slice_type != I_SLICE)
2048             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2049         if (lc->cu.pred_mode != MODE_INTRA ||
2050             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2051             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2052             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2053                                       lc->cu.pred_mode == MODE_INTRA;
2054         }
2055
2056         if (lc->cu.pred_mode == MODE_INTRA) {
2057             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2058                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2059                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2060                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2061             }
2062             if (pcm_flag) {
2063                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2064                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2065                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2066                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2067
2068                 if (ret < 0)
2069                     return ret;
2070             } else {
2071                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2072             }
2073         } else {
2074             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2075             switch (lc->cu.part_mode) {
2076             case PART_2Nx2N:
2077                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2078                 break;
2079             case PART_2NxN:
2080                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2081                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2082                 break;
2083             case PART_Nx2N:
2084                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2085                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2086                 break;
2087             case PART_2NxnU:
2088                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2089                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2090                 break;
2091             case PART_2NxnD:
2092                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2093                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2094                 break;
2095             case PART_nLx2N:
2096                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2097                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2098                 break;
2099             case PART_nRx2N:
2100                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2101                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2102                 break;
2103             case PART_NxN:
2104                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2105                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2106                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2107                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2108                 break;
2109             }
2110         }
2111
2112         if (!pcm_flag) {
2113             int rqt_root_cbf = 1;
2114
2115             if (lc->cu.pred_mode != MODE_INTRA &&
2116                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2117                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2118             }
2119             if (rqt_root_cbf) {
2120                 const static int cbf[2] = { 0 };
2121                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2122                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2123                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2124                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2125                                          log2_cb_size,
2126                                          log2_cb_size, 0, 0, cbf, cbf);
2127                 if (ret < 0)
2128                     return ret;
2129             } else {
2130                 if (!s->sh.disable_deblocking_filter_flag)
2131                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2132             }
2133         }
2134     }
2135
2136     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2137         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2138
2139     x = y_cb * min_cb_width + x_cb;
2140     for (y = 0; y < length; y++) {
2141         memset(&s->qp_y_tab[x], lc->qp_y, length);
2142         x += min_cb_width;
2143     }
2144
2145     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2146        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2147         lc->qPy_pred = lc->qp_y;
2148     }
2149
2150     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2151
2152     return 0;
2153 }
2154
2155 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2156                                int log2_cb_size, int cb_depth)
2157 {
2158     HEVCLocalContext *lc = s->HEVClc;
2159     const int cb_size    = 1 << log2_cb_size;
2160     int ret;
2161     int split_cu;
2162
2163     lc->ct_depth = cb_depth;
2164     if (x0 + cb_size <= s->ps.sps->width  &&
2165         y0 + cb_size <= s->ps.sps->height &&
2166         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2167         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2168     } else {
2169         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2170     }
2171     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2172         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2173         lc->tu.is_cu_qp_delta_coded = 0;
2174         lc->tu.cu_qp_delta          = 0;
2175     }
2176
2177     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2178         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2179         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2180     }
2181
2182     if (split_cu) {
2183         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2184         const int cb_size_split = cb_size >> 1;
2185         const int x1 = x0 + cb_size_split;
2186         const int y1 = y0 + cb_size_split;
2187
2188         int more_data = 0;
2189
2190         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2191         if (more_data < 0)
2192             return more_data;
2193
2194         if (more_data && x1 < s->ps.sps->width) {
2195             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2196             if (more_data < 0)
2197                 return more_data;
2198         }
2199         if (more_data && y1 < s->ps.sps->height) {
2200             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204         if (more_data && x1 < s->ps.sps->width &&
2205             y1 < s->ps.sps->height) {
2206             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2207             if (more_data < 0)
2208                 return more_data;
2209         }
2210
2211         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2212             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2213             lc->qPy_pred = lc->qp_y;
2214
2215         if (more_data)
2216             return ((x1 + cb_size_split) < s->ps.sps->width ||
2217                     (y1 + cb_size_split) < s->ps.sps->height);
2218         else
2219             return 0;
2220     } else {
2221         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2222         if (ret < 0)
2223             return ret;
2224         if ((!((x0 + cb_size) %
2225                (1 << (s->ps.sps->log2_ctb_size))) ||
2226              (x0 + cb_size >= s->ps.sps->width)) &&
2227             (!((y0 + cb_size) %
2228                (1 << (s->ps.sps->log2_ctb_size))) ||
2229              (y0 + cb_size >= s->ps.sps->height))) {
2230             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2231             return !end_of_slice_flag;
2232         } else {
2233             return 1;
2234         }
2235     }
2236
2237     return 0;
2238 }
2239
2240 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2241                                  int ctb_addr_ts)
2242 {
2243     HEVCLocalContext *lc  = s->HEVClc;
2244     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2245     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2246     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2247
2248     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2249
2250     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2251         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2252             lc->first_qp_group = 1;
2253         lc->end_of_tiles_x = s->ps.sps->width;
2254     } else if (s->ps.pps->tiles_enabled_flag) {
2255         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2256             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2257             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2258             lc->first_qp_group   = 1;
2259         }
2260     } else {
2261         lc->end_of_tiles_x = s->ps.sps->width;
2262     }
2263
2264     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2265
2266     lc->boundary_flags = 0;
2267     if (s->ps.pps->tiles_enabled_flag) {
2268         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2269             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2270         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2271             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2272         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2273             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2274         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2275             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2276     } else {
2277         if (ctb_addr_in_slice <= 0)
2278             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2279         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2280             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2281     }
2282
2283     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2284     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2285     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2286     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2287 }
2288
2289 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2290 {
2291     HEVCContext *s  = avctxt->priv_data;
2292     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2293     int more_data   = 1;
2294     int x_ctb       = 0;
2295     int y_ctb       = 0;
2296     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2297
2298     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2299         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2300         return AVERROR_INVALIDDATA;
2301     }
2302
2303     if (s->sh.dependent_slice_segment_flag) {
2304         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2305         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2306             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2307             return AVERROR_INVALIDDATA;
2308         }
2309     }
2310
2311     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2312         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2313
2314         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2315         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2316         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2317
2318         ff_hevc_cabac_init(s, ctb_addr_ts);
2319
2320         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2321
2322         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2323         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2324         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2325
2326         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2327         if (more_data < 0) {
2328             s->tab_slice_address[ctb_addr_rs] = -1;
2329             return more_data;
2330         }
2331
2332
2333         ctb_addr_ts++;
2334         ff_hevc_save_states(s, ctb_addr_ts);
2335         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2336     }
2337
2338     if (x_ctb + ctb_size >= s->ps.sps->width &&
2339         y_ctb + ctb_size >= s->ps.sps->height)
2340         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2341
2342     return ctb_addr_ts;
2343 }
2344
2345 static int hls_slice_data(HEVCContext *s)
2346 {
2347     int arg[2];
2348     int ret[2];
2349
2350     arg[0] = 0;
2351     arg[1] = 1;
2352
2353     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2354     return ret[0];
2355 }
2356 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2357 {
2358     HEVCContext *s1  = avctxt->priv_data, *s;
2359     HEVCLocalContext *lc;
2360     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2361     int more_data   = 1;
2362     int *ctb_row_p    = input_ctb_row;
2363     int ctb_row = ctb_row_p[job];
2364     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2365     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2366     int thread = ctb_row % s1->threads_number;
2367     int ret;
2368
2369     s = s1->sList[self_id];
2370     lc = s->HEVClc;
2371
2372     if(ctb_row) {
2373         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2374
2375         if (ret < 0)
2376             return ret;
2377         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2378     }
2379
2380     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2381         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2382         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2383
2384         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2385
2386         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2387
2388         if (avpriv_atomic_int_get(&s1->wpp_err)){
2389             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2390             return 0;
2391         }
2392
2393         ff_hevc_cabac_init(s, ctb_addr_ts);
2394         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2395         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2396
2397         if (more_data < 0) {
2398             s->tab_slice_address[ctb_addr_rs] = -1;
2399             return more_data;
2400         }
2401
2402         ctb_addr_ts++;
2403
2404         ff_hevc_save_states(s, ctb_addr_ts);
2405         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2406         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2407
2408         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2409             avpriv_atomic_int_set(&s1->wpp_err,  1);
2410             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2411             return 0;
2412         }
2413
2414         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2415             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2416             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2417             return ctb_addr_ts;
2418         }
2419         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2420         x_ctb+=ctb_size;
2421
2422         if(x_ctb >= s->ps.sps->width) {
2423             break;
2424         }
2425     }
2426     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2427
2428     return 0;
2429 }
2430
2431 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2432 {
2433     const uint8_t *data = nal->data;
2434     int length          = nal->size;
2435     HEVCLocalContext *lc = s->HEVClc;
2436     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2437     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2438     int offset;
2439     int startheader, cmpt = 0;
2440     int i, j, res = 0;
2441
2442     if (!ret || !arg) {
2443         av_free(ret);
2444         av_free(arg);
2445         return AVERROR(ENOMEM);
2446     }
2447
2448
2449     if (!s->sList[1]) {
2450         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2451
2452
2453         for (i = 1; i < s->threads_number; i++) {
2454             s->sList[i] = av_malloc(sizeof(HEVCContext));
2455             memcpy(s->sList[i], s, sizeof(HEVCContext));
2456             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2457             s->sList[i]->HEVClc = s->HEVClcList[i];
2458         }
2459     }
2460
2461     offset = (lc->gb.index >> 3);
2462
2463     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2464         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2465             startheader--;
2466             cmpt++;
2467         }
2468     }
2469
2470     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2471         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2472         for (j = 0, cmpt = 0, startheader = offset
2473              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2474             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2475                 startheader--;
2476                 cmpt++;
2477             }
2478         }
2479         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2480         s->sh.offset[i - 1] = offset;
2481
2482     }
2483     if (s->sh.num_entry_point_offsets != 0) {
2484         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2485         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2486         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2487
2488     }
2489     s->data = data;
2490
2491     for (i = 1; i < s->threads_number; i++) {
2492         s->sList[i]->HEVClc->first_qp_group = 1;
2493         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2494         memcpy(s->sList[i], s, sizeof(HEVCContext));
2495         s->sList[i]->HEVClc = s->HEVClcList[i];
2496     }
2497
2498     avpriv_atomic_int_set(&s->wpp_err, 0);
2499     ff_reset_entries(s->avctx);
2500
2501     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2502         arg[i] = i;
2503         ret[i] = 0;
2504     }
2505
2506     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2507         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2508
2509     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2510         res += ret[i];
2511     av_free(ret);
2512     av_free(arg);
2513     return res;
2514 }
2515
2516 static int set_side_data(HEVCContext *s)
2517 {
2518     AVFrame *out = s->ref->frame;
2519
2520     if (s->sei_frame_packing_present &&
2521         s->frame_packing_arrangement_type >= 3 &&
2522         s->frame_packing_arrangement_type <= 5 &&
2523         s->content_interpretation_type > 0 &&
2524         s->content_interpretation_type < 3) {
2525         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2526         if (!stereo)
2527             return AVERROR(ENOMEM);
2528
2529         switch (s->frame_packing_arrangement_type) {
2530         case 3:
2531             if (s->quincunx_subsampling)
2532                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2533             else
2534                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2535             break;
2536         case 4:
2537             stereo->type = AV_STEREO3D_TOPBOTTOM;
2538             break;
2539         case 5:
2540             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2541             break;
2542         }
2543
2544         if (s->content_interpretation_type == 2)
2545             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2546     }
2547
2548     if (s->sei_display_orientation_present &&
2549         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2550         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2551         AVFrameSideData *rotation = av_frame_new_side_data(out,
2552                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2553                                                            sizeof(int32_t) * 9);
2554         if (!rotation)
2555             return AVERROR(ENOMEM);
2556
2557         av_display_rotation_set((int32_t *)rotation->data, angle);
2558         av_display_matrix_flip((int32_t *)rotation->data,
2559                                s->sei_hflip, s->sei_vflip);
2560     }
2561
2562     return 0;
2563 }
2564
2565 static int hevc_frame_start(HEVCContext *s)
2566 {
2567     HEVCLocalContext *lc = s->HEVClc;
2568     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2569                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2570     int ret;
2571
2572     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2573     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2574     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2575     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2576     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2577
2578     s->is_decoded        = 0;
2579     s->first_nal_type    = s->nal_unit_type;
2580
2581     if (s->ps.pps->tiles_enabled_flag)
2582         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2583
2584     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2585     if (ret < 0)
2586         goto fail;
2587
2588     ret = ff_hevc_frame_rps(s);
2589     if (ret < 0) {
2590         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2591         goto fail;
2592     }
2593
2594     s->ref->frame->key_frame = IS_IRAP(s);
2595
2596     ret = set_side_data(s);
2597     if (ret < 0)
2598         goto fail;
2599
2600     s->frame->pict_type = 3 - s->sh.slice_type;
2601
2602     if (!IS_IRAP(s))
2603         ff_hevc_bump_frame(s);
2604
2605     av_frame_unref(s->output_frame);
2606     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2607     if (ret < 0)
2608         goto fail;
2609
2610     if (!s->avctx->hwaccel)
2611         ff_thread_finish_setup(s->avctx);
2612
2613     return 0;
2614
2615 fail:
2616     if (s->ref)
2617         ff_hevc_unref_frame(s, s->ref, ~0);
2618     s->ref = NULL;
2619     return ret;
2620 }
2621
2622 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2623 {
2624     HEVCLocalContext *lc = s->HEVClc;
2625     GetBitContext *gb    = &lc->gb;
2626     int ctb_addr_ts, ret;
2627
2628     *gb              = nal->gb;
2629     s->nal_unit_type = nal->type;
2630     s->temporal_id   = nal->temporal_id;
2631
2632     switch (s->nal_unit_type) {
2633     case NAL_VPS:
2634         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2635         if (ret < 0)
2636             goto fail;
2637         break;
2638     case NAL_SPS:
2639         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2640                                      s->apply_defdispwin);
2641         if (ret < 0)
2642             goto fail;
2643         break;
2644     case NAL_PPS:
2645         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2646         if (ret < 0)
2647             goto fail;
2648         break;
2649     case NAL_SEI_PREFIX:
2650     case NAL_SEI_SUFFIX:
2651         ret = ff_hevc_decode_nal_sei(s);
2652         if (ret < 0)
2653             goto fail;
2654         break;
2655     case NAL_TRAIL_R:
2656     case NAL_TRAIL_N:
2657     case NAL_TSA_N:
2658     case NAL_TSA_R:
2659     case NAL_STSA_N:
2660     case NAL_STSA_R:
2661     case NAL_BLA_W_LP:
2662     case NAL_BLA_W_RADL:
2663     case NAL_BLA_N_LP:
2664     case NAL_IDR_W_RADL:
2665     case NAL_IDR_N_LP:
2666     case NAL_CRA_NUT:
2667     case NAL_RADL_N:
2668     case NAL_RADL_R:
2669     case NAL_RASL_N:
2670     case NAL_RASL_R:
2671         ret = hls_slice_header(s);
2672         if (ret < 0)
2673             return ret;
2674
2675         if (s->max_ra == INT_MAX) {
2676             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2677                 s->max_ra = s->poc;
2678             } else {
2679                 if (IS_IDR(s))
2680                     s->max_ra = INT_MIN;
2681             }
2682         }
2683
2684         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2685             s->poc <= s->max_ra) {
2686             s->is_decoded = 0;
2687             break;
2688         } else {
2689             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2690                 s->max_ra = INT_MIN;
2691         }
2692
2693         if (s->sh.first_slice_in_pic_flag) {
2694             ret = hevc_frame_start(s);
2695             if (ret < 0)
2696                 return ret;
2697         } else if (!s->ref) {
2698             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2699             goto fail;
2700         }
2701
2702         if (s->nal_unit_type != s->first_nal_type) {
2703             av_log(s->avctx, AV_LOG_ERROR,
2704                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2705                    s->first_nal_type, s->nal_unit_type);
2706             return AVERROR_INVALIDDATA;
2707         }
2708
2709         if (!s->sh.dependent_slice_segment_flag &&
2710             s->sh.slice_type != I_SLICE) {
2711             ret = ff_hevc_slice_rpl(s);
2712             if (ret < 0) {
2713                 av_log(s->avctx, AV_LOG_WARNING,
2714                        "Error constructing the reference lists for the current slice.\n");
2715                 goto fail;
2716             }
2717         }
2718
2719         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2720             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2721             if (ret < 0)
2722                 goto fail;
2723         }
2724
2725         if (s->avctx->hwaccel) {
2726             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2727             if (ret < 0)
2728                 goto fail;
2729         } else {
2730             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2731                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2732             else
2733                 ctb_addr_ts = hls_slice_data(s);
2734             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2735                 s->is_decoded = 1;
2736             }
2737
2738             if (ctb_addr_ts < 0) {
2739                 ret = ctb_addr_ts;
2740                 goto fail;
2741             }
2742         }
2743         break;
2744     case NAL_EOS_NUT:
2745     case NAL_EOB_NUT:
2746         s->seq_decode = (s->seq_decode + 1) & 0xff;
2747         s->max_ra     = INT_MAX;
2748         break;
2749     case NAL_AUD:
2750     case NAL_FD_NUT:
2751         break;
2752     default:
2753         av_log(s->avctx, AV_LOG_INFO,
2754                "Skipping NAL unit %d\n", s->nal_unit_type);
2755     }
2756
2757     return 0;
2758 fail:
2759     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2760         return ret;
2761     return 0;
2762 }
2763
2764 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2765 {
2766     int i, ret = 0;
2767
2768     s->ref = NULL;
2769     s->last_eos = s->eos;
2770     s->eos = 0;
2771
2772     /* split the input packet into NAL units, so we know the upper bound on the
2773      * number of slices in the frame */
2774     ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2775                                s->nal_length_size);
2776     if (ret < 0) {
2777         av_log(s->avctx, AV_LOG_ERROR,
2778                "Error splitting the input into NAL units.\n");
2779         return ret;
2780     }
2781
2782     for (i = 0; i < s->pkt.nb_nals; i++) {
2783         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2784             s->pkt.nals[i].type == NAL_EOS_NUT)
2785             s->eos = 1;
2786     }
2787
2788     /* decode the NAL units */
2789     for (i = 0; i < s->pkt.nb_nals; i++) {
2790         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2791         if (ret < 0) {
2792             av_log(s->avctx, AV_LOG_WARNING,
2793                    "Error parsing NAL unit #%d.\n", i);
2794             goto fail;
2795         }
2796     }
2797
2798 fail:
2799     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2800         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2801
2802     return ret;
2803 }
2804
2805 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2806 {
2807     int i;
2808     for (i = 0; i < 16; i++)
2809         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2810 }
2811
2812 static int verify_md5(HEVCContext *s, AVFrame *frame)
2813 {
2814     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2815     int pixel_shift;
2816     int i, j;
2817
2818     if (!desc)
2819         return AVERROR(EINVAL);
2820
2821     pixel_shift = desc->comp[0].depth_minus1 > 7;
2822
2823     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2824            s->poc);
2825
2826     /* the checksums are LE, so we have to byteswap for >8bpp formats
2827      * on BE arches */
2828 #if HAVE_BIGENDIAN
2829     if (pixel_shift && !s->checksum_buf) {
2830         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2831                        FFMAX3(frame->linesize[0], frame->linesize[1],
2832                               frame->linesize[2]));
2833         if (!s->checksum_buf)
2834             return AVERROR(ENOMEM);
2835     }
2836 #endif
2837
2838     for (i = 0; frame->data[i]; i++) {
2839         int width  = s->avctx->coded_width;
2840         int height = s->avctx->coded_height;
2841         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2842         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2843         uint8_t md5[16];
2844
2845         av_md5_init(s->md5_ctx);
2846         for (j = 0; j < h; j++) {
2847             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2848 #if HAVE_BIGENDIAN
2849             if (pixel_shift) {
2850                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2851                                     (const uint16_t *) src, w);
2852                 src = s->checksum_buf;
2853             }
2854 #endif
2855             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2856         }
2857         av_md5_final(s->md5_ctx, md5);
2858
2859         if (!memcmp(md5, s->md5[i], 16)) {
2860             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2861             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2862             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2863         } else {
2864             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2865             print_md5(s->avctx, AV_LOG_ERROR, md5);
2866             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2867             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2868             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2869             return AVERROR_INVALIDDATA;
2870         }
2871     }
2872
2873     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2874
2875     return 0;
2876 }
2877
2878 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2879                              AVPacket *avpkt)
2880 {
2881     int ret;
2882     HEVCContext *s = avctx->priv_data;
2883
2884     if (!avpkt->size) {
2885         ret = ff_hevc_output_frame(s, data, 1);
2886         if (ret < 0)
2887             return ret;
2888
2889         *got_output = ret;
2890         return 0;
2891     }
2892
2893     s->ref = NULL;
2894     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2895     if (ret < 0)
2896         return ret;
2897
2898     if (avctx->hwaccel) {
2899         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
2900             av_log(avctx, AV_LOG_ERROR,
2901                    "hardware accelerator failed to decode picture\n");
2902     } else {
2903         /* verify the SEI checksum */
2904         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2905             s->is_md5) {
2906             ret = verify_md5(s, s->ref->frame);
2907             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2908                 ff_hevc_unref_frame(s, s->ref, ~0);
2909                 return ret;
2910             }
2911         }
2912     }
2913     s->is_md5 = 0;
2914
2915     if (s->is_decoded) {
2916         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2917         s->is_decoded = 0;
2918     }
2919
2920     if (s->output_frame->buf[0]) {
2921         av_frame_move_ref(data, s->output_frame);
2922         *got_output = 1;
2923     }
2924
2925     return avpkt->size;
2926 }
2927
2928 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2929 {
2930     int ret;
2931
2932     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2933     if (ret < 0)
2934         return ret;
2935
2936     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2937     if (!dst->tab_mvf_buf)
2938         goto fail;
2939     dst->tab_mvf = src->tab_mvf;
2940
2941     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2942     if (!dst->rpl_tab_buf)
2943         goto fail;
2944     dst->rpl_tab = src->rpl_tab;
2945
2946     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2947     if (!dst->rpl_buf)
2948         goto fail;
2949
2950     dst->poc        = src->poc;
2951     dst->ctb_count  = src->ctb_count;
2952     dst->window     = src->window;
2953     dst->flags      = src->flags;
2954     dst->sequence   = src->sequence;
2955
2956     if (src->hwaccel_picture_private) {
2957         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2958         if (!dst->hwaccel_priv_buf)
2959             goto fail;
2960         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2961     }
2962
2963     return 0;
2964 fail:
2965     ff_hevc_unref_frame(s, dst, ~0);
2966     return AVERROR(ENOMEM);
2967 }
2968
2969 static av_cold int hevc_decode_free(AVCodecContext *avctx)
2970 {
2971     HEVCContext       *s = avctx->priv_data;
2972     int i;
2973
2974     pic_arrays_free(s);
2975
2976     av_freep(&s->md5_ctx);
2977
2978     av_freep(&s->cabac_state);
2979
2980     for (i = 0; i < 3; i++) {
2981         av_freep(&s->sao_pixel_buffer_h[i]);
2982         av_freep(&s->sao_pixel_buffer_v[i]);
2983     }
2984     av_frame_free(&s->output_frame);
2985
2986     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
2987         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
2988         av_frame_free(&s->DPB[i].frame);
2989     }
2990
2991     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
2992         av_buffer_unref(&s->ps.vps_list[i]);
2993     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
2994         av_buffer_unref(&s->ps.sps_list[i]);
2995     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
2996         av_buffer_unref(&s->ps.pps_list[i]);
2997     s->ps.sps = NULL;
2998     s->ps.pps = NULL;
2999     s->ps.vps = NULL;
3000
3001     av_freep(&s->sh.entry_point_offset);
3002     av_freep(&s->sh.offset);
3003     av_freep(&s->sh.size);
3004
3005     for (i = 1; i < s->threads_number; i++) {
3006         HEVCLocalContext *lc = s->HEVClcList[i];
3007         if (lc) {
3008             av_freep(&s->HEVClcList[i]);
3009             av_freep(&s->sList[i]);
3010         }
3011     }
3012     if (s->HEVClc == s->HEVClcList[0])
3013         s->HEVClc = NULL;
3014     av_freep(&s->HEVClcList[0]);
3015
3016     for (i = 0; i < s->pkt.nals_allocated; i++) {
3017         av_freep(&s->pkt.nals[i].rbsp_buffer);
3018         av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3019     }
3020     av_freep(&s->pkt.nals);
3021     s->pkt.nals_allocated = 0;
3022
3023     return 0;
3024 }
3025
3026 static av_cold int hevc_init_context(AVCodecContext *avctx)
3027 {
3028     HEVCContext *s = avctx->priv_data;
3029     int i;
3030
3031     s->avctx = avctx;
3032
3033     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3034     if (!s->HEVClc)
3035         goto fail;
3036     s->HEVClcList[0] = s->HEVClc;
3037     s->sList[0] = s;
3038
3039     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3040     if (!s->cabac_state)
3041         goto fail;
3042
3043     s->output_frame = av_frame_alloc();
3044     if (!s->output_frame)
3045         goto fail;
3046
3047     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3048         s->DPB[i].frame = av_frame_alloc();
3049         if (!s->DPB[i].frame)
3050             goto fail;
3051         s->DPB[i].tf.f = s->DPB[i].frame;
3052     }
3053
3054     s->max_ra = INT_MAX;
3055
3056     s->md5_ctx = av_md5_alloc();
3057     if (!s->md5_ctx)
3058         goto fail;
3059
3060     ff_bswapdsp_init(&s->bdsp);
3061
3062     s->context_initialized = 1;
3063     s->eos = 0;
3064
3065     return 0;
3066
3067 fail:
3068     hevc_decode_free(avctx);
3069     return AVERROR(ENOMEM);
3070 }
3071
3072 static int hevc_update_thread_context(AVCodecContext *dst,
3073                                       const AVCodecContext *src)
3074 {
3075     HEVCContext *s  = dst->priv_data;
3076     HEVCContext *s0 = src->priv_data;
3077     int i, ret;
3078
3079     if (!s->context_initialized) {
3080         ret = hevc_init_context(dst);
3081         if (ret < 0)
3082             return ret;
3083     }
3084
3085     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3086         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3087         if (s0->DPB[i].frame->buf[0]) {
3088             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3089             if (ret < 0)
3090                 return ret;
3091         }
3092     }
3093
3094     if (s->ps.sps != s0->ps.sps)
3095         s->ps.sps = NULL;
3096     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3097         av_buffer_unref(&s->ps.vps_list[i]);
3098         if (s0->ps.vps_list[i]) {
3099             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3100             if (!s->ps.vps_list[i])
3101                 return AVERROR(ENOMEM);
3102         }
3103     }
3104
3105     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3106         av_buffer_unref(&s->ps.sps_list[i]);
3107         if (s0->ps.sps_list[i]) {
3108             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3109             if (!s->ps.sps_list[i])
3110                 return AVERROR(ENOMEM);
3111         }
3112     }
3113
3114     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3115         av_buffer_unref(&s->ps.pps_list[i]);
3116         if (s0->ps.pps_list[i]) {
3117             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3118             if (!s->ps.pps_list[i])
3119                 return AVERROR(ENOMEM);
3120         }
3121     }
3122
3123     if (s->ps.sps != s0->ps.sps)
3124         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3125             return ret;
3126
3127     s->seq_decode = s0->seq_decode;
3128     s->seq_output = s0->seq_output;
3129     s->pocTid0    = s0->pocTid0;
3130     s->max_ra     = s0->max_ra;
3131     s->eos        = s0->eos;
3132
3133     s->is_nalff        = s0->is_nalff;
3134     s->nal_length_size = s0->nal_length_size;
3135
3136     s->threads_number      = s0->threads_number;
3137     s->threads_type        = s0->threads_type;
3138
3139     if (s0->eos) {
3140         s->seq_decode = (s->seq_decode + 1) & 0xff;
3141         s->max_ra = INT_MAX;
3142     }
3143
3144     return 0;
3145 }
3146
3147 static int hevc_decode_extradata(HEVCContext *s)
3148 {
3149     AVCodecContext *avctx = s->avctx;
3150     GetByteContext gb;
3151     int ret, i;
3152
3153     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3154
3155     if (avctx->extradata_size > 3 &&
3156         (avctx->extradata[0] || avctx->extradata[1] ||
3157          avctx->extradata[2] > 1)) {
3158         /* It seems the extradata is encoded as hvcC format.
3159          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3160          * is finalized. When finalized, configurationVersion will be 1 and we
3161          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3162         int i, j, num_arrays, nal_len_size;
3163
3164         s->is_nalff = 1;
3165
3166         bytestream2_skip(&gb, 21);
3167         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3168         num_arrays   = bytestream2_get_byte(&gb);
3169
3170         /* nal units in the hvcC always have length coded with 2 bytes,
3171          * so put a fake nal_length_size = 2 while parsing them */
3172         s->nal_length_size = 2;
3173
3174         /* Decode nal units from hvcC. */
3175         for (i = 0; i < num_arrays; i++) {
3176             int type = bytestream2_get_byte(&gb) & 0x3f;
3177             int cnt  = bytestream2_get_be16(&gb);
3178
3179             for (j = 0; j < cnt; j++) {
3180                 // +2 for the nal size field
3181                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3182                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3183                     av_log(s->avctx, AV_LOG_ERROR,
3184                            "Invalid NAL unit size in extradata.\n");
3185                     return AVERROR_INVALIDDATA;
3186                 }
3187
3188                 ret = decode_nal_units(s, gb.buffer, nalsize);
3189                 if (ret < 0) {
3190                     av_log(avctx, AV_LOG_ERROR,
3191                            "Decoding nal unit %d %d from hvcC failed\n",
3192                            type, i);
3193                     return ret;
3194                 }
3195                 bytestream2_skip(&gb, nalsize);
3196             }
3197         }
3198
3199         /* Now store right nal length size, that will be used to parse
3200          * all other nals */
3201         s->nal_length_size = nal_len_size;
3202     } else {
3203         s->is_nalff = 0;
3204         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3205         if (ret < 0)
3206             return ret;
3207     }
3208
3209     /* export stream parameters from the first SPS */
3210     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3211         if (s->ps.sps_list[i]) {
3212             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3213             export_stream_params(s->avctx, &s->ps, sps);
3214             break;
3215         }
3216     }
3217
3218     return 0;
3219 }
3220
3221 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3222 {
3223     HEVCContext *s = avctx->priv_data;
3224     int ret;
3225
3226     ff_init_cabac_states();
3227
3228     avctx->internal->allocate_progress = 1;
3229
3230     ret = hevc_init_context(avctx);
3231     if (ret < 0)
3232         return ret;
3233
3234     s->enable_parallel_tiles = 0;
3235     s->picture_struct = 0;
3236
3237     if(avctx->active_thread_type & FF_THREAD_SLICE)
3238         s->threads_number = avctx->thread_count;
3239     else
3240         s->threads_number = 1;
3241
3242     if (avctx->extradata_size > 0 && avctx->extradata) {
3243         ret = hevc_decode_extradata(s);
3244         if (ret < 0) {
3245             hevc_decode_free(avctx);
3246             return ret;
3247         }
3248     }
3249
3250     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3251             s->threads_type = FF_THREAD_FRAME;
3252         else
3253             s->threads_type = FF_THREAD_SLICE;
3254
3255     return 0;
3256 }
3257
3258 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3259 {
3260     HEVCContext *s = avctx->priv_data;
3261     int ret;
3262
3263     memset(s, 0, sizeof(*s));
3264
3265     ret = hevc_init_context(avctx);
3266     if (ret < 0)
3267         return ret;
3268
3269     return 0;
3270 }
3271
3272 static void hevc_decode_flush(AVCodecContext *avctx)
3273 {
3274     HEVCContext *s = avctx->priv_data;
3275     ff_hevc_flush_dpb(s);
3276     s->max_ra = INT_MAX;
3277 }
3278
3279 #define OFFSET(x) offsetof(HEVCContext, x)
3280 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3281
3282 static const AVProfile profiles[] = {
3283     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3284     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3285     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3286     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3287     { FF_PROFILE_UNKNOWN },
3288 };
3289
3290 static const AVOption options[] = {
3291     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3292         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3293     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3294         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3295     { NULL },
3296 };
3297
3298 static const AVClass hevc_decoder_class = {
3299     .class_name = "HEVC decoder",
3300     .item_name  = av_default_item_name,
3301     .option     = options,
3302     .version    = LIBAVUTIL_VERSION_INT,
3303 };
3304
3305 AVCodec ff_hevc_decoder = {
3306     .name                  = "hevc",
3307     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3308     .type                  = AVMEDIA_TYPE_VIDEO,
3309     .id                    = AV_CODEC_ID_HEVC,
3310     .priv_data_size        = sizeof(HEVCContext),
3311     .priv_class            = &hevc_decoder_class,
3312     .init                  = hevc_decode_init,
3313     .close                 = hevc_decode_free,
3314     .decode                = hevc_decode_frame,
3315     .flush                 = hevc_decode_flush,
3316     .update_thread_context = hevc_update_thread_context,
3317     .init_thread_copy      = hevc_init_thread_copy,
3318     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3319                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3320     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3321 };