git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41 #include "profiles.h"
  42
  43 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  44
  45 /**
  46  * NOTE: Each function hls_foo correspond to the function foo in the
  47  * specification (HLS stands for High Level Syntax).
  48  */
  49
  50 /**
  51  * Section 5.7
  52  */
  53
  54 /* free everything allocated  by pic_arrays_init() */
  55 static void pic_arrays_free(HEVCContext *s)
  56 {
  57     av_freep(&s->sao);
  58     av_freep(&s->deblock);
  59
  60     av_freep(&s->skip_flag);
  61     av_freep(&s->tab_ct_depth);
  62
  63     av_freep(&s->tab_ipm);
  64     av_freep(&s->cbf_luma);
  65     av_freep(&s->is_pcm);
  66
  67     av_freep(&s->qp_y_tab);
  68     av_freep(&s->tab_slice_address);
  69     av_freep(&s->filter_slice_edges);
  70
  71     av_freep(&s->horizontal_bs);
  72     av_freep(&s->vertical_bs);
  73
  74     av_freep(&s->sh.entry_point_offset);
  75     av_freep(&s->sh.size);
  76     av_freep(&s->sh.offset);
  77
  78     av_buffer_pool_uninit(&s->tab_mvf_pool);
  79     av_buffer_pool_uninit(&s->rpl_tab_pool);
  80 }
  81
  82 /* allocate arrays that depend on frame dimensions */
  83 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  84 {
  85     int log2_min_cb_size = sps->log2_min_cb_size;
  86     int width            = sps->width;
  87     int height           = sps->height;
  88     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  89                            ((height >> log2_min_cb_size) + 1);
  90     int ctb_count        = sps->ctb_width * sps->ctb_height;
  91     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  92
  93     s->bs_width  = (width  >> 2) + 1;
  94     s->bs_height = (height >> 2) + 1;
  95
  96     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  97     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  98     if (!s->sao || !s->deblock)
  99         goto fail;
 100
 101     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 103     if (!s->skip_flag || !s->tab_ct_depth)
 104         goto fail;
 105
 106     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 107     s->tab_ipm  = av_mallocz(min_pu_size);
 108     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 109     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 110         goto fail;
 111
 112     s->filter_slice_edges = av_mallocz(ctb_count);
 113     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 114                                       sizeof(*s->tab_slice_address));
 115     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 116                                       sizeof(*s->qp_y_tab));
 117     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 118         goto fail;
 119
 120     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 121     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 122     if (!s->horizontal_bs || !s->vertical_bs)
 123         goto fail;
 124
 125     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 126                                           av_buffer_allocz);
 127     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 128                                           av_buffer_allocz);
 129     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 130         goto fail;
 131
 132     return 0;
 133
 134 fail:
 135     pic_arrays_free(s);
 136     return AVERROR(ENOMEM);
 137 }
 138
 139 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 140 {
 141     int i = 0;
 142     int j = 0;
 143     uint8_t luma_weight_l0_flag[16];
 144     uint8_t chroma_weight_l0_flag[16];
 145     uint8_t luma_weight_l1_flag[16];
 146     uint8_t chroma_weight_l1_flag[16];
 147     int luma_log2_weight_denom;
 148
 149     luma_log2_weight_denom = get_ue_golomb_long(gb);
 150     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 151         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 152     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 153     if (s->ps.sps->chroma_format_idc != 0) {
 154         int delta = get_se_golomb(gb);
 155         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 156     }
 157
 158     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 159         luma_weight_l0_flag[i] = get_bits1(gb);
 160         if (!luma_weight_l0_flag[i]) {
 161             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 162             s->sh.luma_offset_l0[i] = 0;
 163         }
 164     }
 165     if (s->ps.sps->chroma_format_idc != 0) {
 166         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 167             chroma_weight_l0_flag[i] = get_bits1(gb);
 168     } else {
 169         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 170             chroma_weight_l0_flag[i] = 0;
 171     }
 172     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 173         if (luma_weight_l0_flag[i]) {
 174             int delta_luma_weight_l0 = get_se_golomb(gb);
 175             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 176             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 177         }
 178         if (chroma_weight_l0_flag[i]) {
 179             for (j = 0; j < 2; j++) {
 180                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 181                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 182                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 183                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 184                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 185             }
 186         } else {
 187             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 188             s->sh.chroma_offset_l0[i][0] = 0;
 189             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 190             s->sh.chroma_offset_l0[i][1] = 0;
 191         }
 192     }
 193     if (s->sh.slice_type == B_SLICE) {
 194         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 195             luma_weight_l1_flag[i] = get_bits1(gb);
 196             if (!luma_weight_l1_flag[i]) {
 197                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 198                 s->sh.luma_offset_l1[i] = 0;
 199             }
 200         }
 201         if (s->ps.sps->chroma_format_idc != 0) {
 202             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 203                 chroma_weight_l1_flag[i] = get_bits1(gb);
 204         } else {
 205             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 206                 chroma_weight_l1_flag[i] = 0;
 207         }
 208         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 209             if (luma_weight_l1_flag[i]) {
 210                 int delta_luma_weight_l1 = get_se_golomb(gb);
 211                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 212                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 213             }
 214             if (chroma_weight_l1_flag[i]) {
 215                 for (j = 0; j < 2; j++) {
 216                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 217                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 218                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 219                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 220                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 221                 }
 222             } else {
 223                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 224                 s->sh.chroma_offset_l1[i][0] = 0;
 225                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 226                 s->sh.chroma_offset_l1[i][1] = 0;
 227             }
 228         }
 229     }
 230 }
 231
 232 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 233 {
 234     const HEVCSPS *sps = s->ps.sps;
 235     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 236     int prev_delta_msb = 0;
 237     unsigned int nb_sps = 0, nb_sh;
 238     int i;
 239
 240     rps->nb_refs = 0;
 241     if (!sps->long_term_ref_pics_present_flag)
 242         return 0;
 243
 244     if (sps->num_long_term_ref_pics_sps > 0)
 245         nb_sps = get_ue_golomb_long(gb);
 246     nb_sh = get_ue_golomb_long(gb);
 247
 248     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 249         return AVERROR_INVALIDDATA;
 250
 251     rps->nb_refs = nb_sh + nb_sps;
 252
 253     for (i = 0; i < rps->nb_refs; i++) {
 254         uint8_t delta_poc_msb_present;
 255
 256         if (i < nb_sps) {
 257             uint8_t lt_idx_sps = 0;
 258
 259             if (sps->num_long_term_ref_pics_sps > 1)
 260                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 261
 262             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 263             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 264         } else {
 265             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 266             rps->used[i] = get_bits1(gb);
 267         }
 268
 269         delta_poc_msb_present = get_bits1(gb);
 270         if (delta_poc_msb_present) {
 271             int delta = get_ue_golomb_long(gb);
 272
 273             if (i && i != nb_sps)
 274                 delta += prev_delta_msb;
 275
 276             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 277             prev_delta_msb = delta;
 278         }
 279     }
 280
 281     return 0;
 282 }
 283
 284 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 285                                  const HEVCSPS *sps)
 286 {
 287     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 288     unsigned int num = 0, den = 0;
 289
 290     avctx->pix_fmt             = sps->pix_fmt;
 291     avctx->coded_width         = sps->width;
 292     avctx->coded_height        = sps->height;
 293     avctx->width               = sps->output_width;
 294     avctx->height              = sps->output_height;
 295     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 296     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 297     avctx->level               = sps->ptl.general_ptl.level_idc;
 298
 299     ff_set_sar(avctx, sps->vui.sar);
 300
 301     if (sps->vui.video_signal_type_present_flag)
 302         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 303                                                             : AVCOL_RANGE_MPEG;
 304     else
 305         avctx->color_range = AVCOL_RANGE_MPEG;
 306
 307     if (sps->vui.colour_description_present_flag) {
 308         avctx->color_primaries = sps->vui.colour_primaries;
 309         avctx->color_trc       = sps->vui.transfer_characteristic;
 310         avctx->colorspace      = sps->vui.matrix_coeffs;
 311     } else {
 312         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 313         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 314         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 315     }
 316
 317     if (vps->vps_timing_info_present_flag) {
 318         num = vps->vps_num_units_in_tick;
 319         den = vps->vps_time_scale;
 320     } else if (sps->vui.vui_timing_info_present_flag) {
 321         num = sps->vui.vui_num_units_in_tick;
 322         den = sps->vui.vui_time_scale;
 323     }
 324
 325     if (num != 0 && den != 0)
 326         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 327                   num, den, 1 << 30);
 328 }
 329
 330 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 331 {
 332     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 333     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 334     int ret, i;
 335
 336     pic_arrays_free(s);
 337     s->ps.sps = NULL;
 338     s->ps.vps = NULL;
 339
 340     if (!sps)
 341         return 0;
 342
 343     ret = pic_arrays_init(s, sps);
 344     if (ret < 0)
 345         goto fail;
 346
 347     export_stream_params(s->avctx, &s->ps, sps);
 348
 349     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 350 #if CONFIG_HEVC_DXVA2_HWACCEL
 351         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 352 #endif
 353 #if CONFIG_HEVC_D3D11VA_HWACCEL
 354         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 355 #endif
 356 #if CONFIG_HEVC_VAAPI_HWACCEL
 357         *fmt++ = AV_PIX_FMT_VAAPI;
 358 #endif
 359 #if CONFIG_HEVC_VDPAU_HWACCEL
 360         *fmt++ = AV_PIX_FMT_VDPAU;
 361 #endif
 362     }
 363
 364     if (pix_fmt == AV_PIX_FMT_NONE) {
 365         *fmt++ = sps->pix_fmt;
 366         *fmt = AV_PIX_FMT_NONE;
 367
 368         ret = ff_thread_get_format(s->avctx, pix_fmts);
 369         if (ret < 0)
 370             goto fail;
 371         s->avctx->pix_fmt = ret;
 372     }
 373     else {
 374         s->avctx->pix_fmt = pix_fmt;
 375     }
 376
 377     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 378     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 379     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 380
 381     for (i = 0; i < 3; i++) {
 382         av_freep(&s->sao_pixel_buffer_h[i]);
 383         av_freep(&s->sao_pixel_buffer_v[i]);
 384     }
 385
 386     if (sps->sao_enabled && !s->avctx->hwaccel) {
 387         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 388         int c_idx;
 389
 390         for(c_idx = 0; c_idx < c_count; c_idx++) {
 391             int w = sps->width >> sps->hshift[c_idx];
 392             int h = sps->height >> sps->vshift[c_idx];
 393             s->sao_pixel_buffer_h[c_idx] =
 394                 av_malloc((w * 2 * sps->ctb_height) <<
 395                           sps->pixel_shift);
 396             s->sao_pixel_buffer_v[c_idx] =
 397                 av_malloc((h * 2 * sps->ctb_width) <<
 398                           sps->pixel_shift);
 399         }
 400     }
 401
 402     s->ps.sps = sps;
 403     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 404
 405     return 0;
 406
 407 fail:
 408     pic_arrays_free(s);
 409     s->ps.sps = NULL;
 410     return ret;
 411 }
 412
 413 static int hls_slice_header(HEVCContext *s)
 414 {
 415     GetBitContext *gb = &s->HEVClc->gb;
 416     SliceHeader *sh   = &s->sh;
 417     int i, ret;
 418
 419     // Coded parameters
 420     sh->first_slice_in_pic_flag = get_bits1(gb);
 421     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 422         s->seq_decode = (s->seq_decode + 1) & 0xff;
 423         s->max_ra     = INT_MAX;
 424         if (IS_IDR(s))
 425             ff_hevc_clear_refs(s);
 426     }
 427     sh->no_output_of_prior_pics_flag = 0;
 428     if (IS_IRAP(s))
 429         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 430
 431     sh->pps_id = get_ue_golomb_long(gb);
 432     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 433         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 434         return AVERROR_INVALIDDATA;
 435     }
 436     if (!sh->first_slice_in_pic_flag &&
 437         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 438         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 439         return AVERROR_INVALIDDATA;
 440     }
 441     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 442     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 443         sh->no_output_of_prior_pics_flag = 1;
 444
 445     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 446         const HEVCSPS* last_sps = s->ps.sps;
 447         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 448         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 449             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 450                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 451                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 452                 sh->no_output_of_prior_pics_flag = 0;
 453         }
 454         ff_hevc_clear_refs(s);
 455         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 456         if (ret < 0)
 457             return ret;
 458
 459         s->seq_decode = (s->seq_decode + 1) & 0xff;
 460         s->max_ra     = INT_MAX;
 461     }
 462
 463     sh->dependent_slice_segment_flag = 0;
 464     if (!sh->first_slice_in_pic_flag) {
 465         int slice_address_length;
 466
 467         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 468             sh->dependent_slice_segment_flag = get_bits1(gb);
 469
 470         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 471                                             s->ps.sps->ctb_height);
 472         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 473         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 474             av_log(s->avctx, AV_LOG_ERROR,
 475                    "Invalid slice segment address: %u.\n",
 476                    sh->slice_segment_addr);
 477             return AVERROR_INVALIDDATA;
 478         }
 479
 480         if (!sh->dependent_slice_segment_flag) {
 481             sh->slice_addr = sh->slice_segment_addr;
 482             s->slice_idx++;
 483         }
 484     } else {
 485         sh->slice_segment_addr = sh->slice_addr = 0;
 486         s->slice_idx           = 0;
 487         s->slice_initialized   = 0;
 488     }
 489
 490     if (!sh->dependent_slice_segment_flag) {
 491         s->slice_initialized = 0;
 492
 493         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 494             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 495
 496         sh->slice_type = get_ue_golomb_long(gb);
 497         if (!(sh->slice_type == I_SLICE ||
 498               sh->slice_type == P_SLICE ||
 499               sh->slice_type == B_SLICE)) {
 500             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 501                    sh->slice_type);
 502             return AVERROR_INVALIDDATA;
 503         }
 504         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 505             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 506             return AVERROR_INVALIDDATA;
 507         }
 508
 509         // when flag is not present, picture is inferred to be output
 510         sh->pic_output_flag = 1;
 511         if (s->ps.pps->output_flag_present_flag)
 512             sh->pic_output_flag = get_bits1(gb);
 513
 514         if (s->ps.sps->separate_colour_plane_flag)
 515             sh->colour_plane_id = get_bits(gb, 2);
 516
 517         if (!IS_IDR(s)) {
 518             int poc, pos;
 519
 520             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 521             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 522             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 523                 av_log(s->avctx, AV_LOG_WARNING,
 524                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 525                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 526                     return AVERROR_INVALIDDATA;
 527                 poc = s->poc;
 528             }
 529             s->poc = poc;
 530
 531             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 532             pos = get_bits_left(gb);
 533             if (!sh->short_term_ref_pic_set_sps_flag) {
 534                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 535                 if (ret < 0)
 536                     return ret;
 537
 538                 sh->short_term_rps = &sh->slice_rps;
 539             } else {
 540                 int numbits, rps_idx;
 541
 542                 if (!s->ps.sps->nb_st_rps) {
 543                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 544                     return AVERROR_INVALIDDATA;
 545                 }
 546
 547                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 548                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 549                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 550             }
 551             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 552
 553             pos = get_bits_left(gb);
 554             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 555             if (ret < 0) {
 556                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 557                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 558                     return AVERROR_INVALIDDATA;
 559             }
 560             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 561
 562             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 563                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 564             else
 565                 sh->slice_temporal_mvp_enabled_flag = 0;
 566         } else {
 567             s->sh.short_term_rps = NULL;
 568             s->poc               = 0;
 569         }
 570
 571         /* 8.3.1 */
 572         if (s->temporal_id == 0 &&
 573             s->nal_unit_type != NAL_TRAIL_N &&
 574             s->nal_unit_type != NAL_TSA_N   &&
 575             s->nal_unit_type != NAL_STSA_N  &&
 576             s->nal_unit_type != NAL_RADL_N  &&
 577             s->nal_unit_type != NAL_RADL_R  &&
 578             s->nal_unit_type != NAL_RASL_N  &&
 579             s->nal_unit_type != NAL_RASL_R)
 580             s->pocTid0 = s->poc;
 581
 582         if (s->ps.sps->sao_enabled) {
 583             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 584             if (s->ps.sps->chroma_format_idc) {
 585                 sh->slice_sample_adaptive_offset_flag[1] =
 586                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 587             }
 588         } else {
 589             sh->slice_sample_adaptive_offset_flag[0] = 0;
 590             sh->slice_sample_adaptive_offset_flag[1] = 0;
 591             sh->slice_sample_adaptive_offset_flag[2] = 0;
 592         }
 593
 594         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 595         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 596             int nb_refs;
 597
 598             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 599             if (sh->slice_type == B_SLICE)
 600                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 601
 602             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 603                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 604                 if (sh->slice_type == B_SLICE)
 605                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 606             }
 607             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 608                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 609                        sh->nb_refs[L0], sh->nb_refs[L1]);
 610                 return AVERROR_INVALIDDATA;
 611             }
 612
 613             sh->rpl_modification_flag[0] = 0;
 614             sh->rpl_modification_flag[1] = 0;
 615             nb_refs = ff_hevc_frame_nb_refs(s);
 616             if (!nb_refs) {
 617                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 618                 return AVERROR_INVALIDDATA;
 619             }
 620
 621             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 622                 sh->rpl_modification_flag[0] = get_bits1(gb);
 623                 if (sh->rpl_modification_flag[0]) {
 624                     for (i = 0; i < sh->nb_refs[L0]; i++)
 625                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 626                 }
 627
 628                 if (sh->slice_type == B_SLICE) {
 629                     sh->rpl_modification_flag[1] = get_bits1(gb);
 630                     if (sh->rpl_modification_flag[1] == 1)
 631                         for (i = 0; i < sh->nb_refs[L1]; i++)
 632                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 633                 }
 634             }
 635
 636             if (sh->slice_type == B_SLICE)
 637                 sh->mvd_l1_zero_flag = get_bits1(gb);
 638
 639             if (s->ps.pps->cabac_init_present_flag)
 640                 sh->cabac_init_flag = get_bits1(gb);
 641             else
 642                 sh->cabac_init_flag = 0;
 643
 644             sh->collocated_ref_idx = 0;
 645             if (sh->slice_temporal_mvp_enabled_flag) {
 646                 sh->collocated_list = L0;
 647                 if (sh->slice_type == B_SLICE)
 648                     sh->collocated_list = !get_bits1(gb);
 649
 650                 if (sh->nb_refs[sh->collocated_list] > 1) {
 651                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 652                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 653                         av_log(s->avctx, AV_LOG_ERROR,
 654                                "Invalid collocated_ref_idx: %d.\n",
 655                                sh->collocated_ref_idx);
 656                         return AVERROR_INVALIDDATA;
 657                     }
 658                 }
 659             }
 660
 661             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 662                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 663                 pred_weight_table(s, gb);
 664             }
 665
 666             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 667             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 668                 av_log(s->avctx, AV_LOG_ERROR,
 669                        "Invalid number of merging MVP candidates: %d.\n",
 670                        sh->max_num_merge_cand);
 671                 return AVERROR_INVALIDDATA;
 672             }
 673         }
 674
 675         sh->slice_qp_delta = get_se_golomb(gb);
 676
 677         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 678             sh->slice_cb_qp_offset = get_se_golomb(gb);
 679             sh->slice_cr_qp_offset = get_se_golomb(gb);
 680         } else {
 681             sh->slice_cb_qp_offset = 0;
 682             sh->slice_cr_qp_offset = 0;
 683         }
 684
 685         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 686             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 687         else
 688             sh->cu_chroma_qp_offset_enabled_flag = 0;
 689
 690         if (s->ps.pps->deblocking_filter_control_present_flag) {
 691             int deblocking_filter_override_flag = 0;
 692
 693             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 694                 deblocking_filter_override_flag = get_bits1(gb);
 695
 696             if (deblocking_filter_override_flag) {
 697                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 698                 if (!sh->disable_deblocking_filter_flag) {
 699                     sh->beta_offset = get_se_golomb(gb) * 2;
 700                     sh->tc_offset   = get_se_golomb(gb) * 2;
 701                 }
 702             } else {
 703                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 704                 sh->beta_offset                    = s->ps.pps->beta_offset;
 705                 sh->tc_offset                      = s->ps.pps->tc_offset;
 706             }
 707         } else {
 708             sh->disable_deblocking_filter_flag = 0;
 709             sh->beta_offset                    = 0;
 710             sh->tc_offset                      = 0;
 711         }
 712
 713         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 714             (sh->slice_sample_adaptive_offset_flag[0] ||
 715              sh->slice_sample_adaptive_offset_flag[1] ||
 716              !sh->disable_deblocking_filter_flag)) {
 717             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 718         } else {
 719             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 720         }
 721     } else if (!s->slice_initialized) {
 722         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 723         return AVERROR_INVALIDDATA;
 724     }
 725
 726     sh->num_entry_point_offsets = 0;
 727     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 728         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 729         // It would be possible to bound this tighter but this here is simpler
 730         if (num_entry_point_offsets > get_bits_left(gb)) {
 731             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 732             return AVERROR_INVALIDDATA;
 733         }
 734
 735         sh->num_entry_point_offsets = num_entry_point_offsets;
 736         if (sh->num_entry_point_offsets > 0) {
 737             int offset_len = get_ue_golomb_long(gb) + 1;
 738
 739             if (offset_len < 1 || offset_len > 32) {
 740                 sh->num_entry_point_offsets = 0;
 741                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 742                 return AVERROR_INVALIDDATA;
 743             }
 744
 745             av_freep(&sh->entry_point_offset);
 746             av_freep(&sh->offset);
 747             av_freep(&sh->size);
 748             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(unsigned));
 749             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 750             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 751             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 752                 sh->num_entry_point_offsets = 0;
 753                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 754                 return AVERROR(ENOMEM);
 755             }
 756             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 757                 unsigned val = get_bits_long(gb, offset_len);
 758                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 759             }
 760             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 761                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 762                 s->threads_number = 1;
 763             } else
 764                 s->enable_parallel_tiles = 0;
 765         } else
 766             s->enable_parallel_tiles = 0;
 767     }
 768
 769     if (s->ps.pps->slice_header_extension_present_flag) {
 770         unsigned int length = get_ue_golomb_long(gb);
 771         if (length*8LL > get_bits_left(gb)) {
 772             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 773             return AVERROR_INVALIDDATA;
 774         }
 775         for (i = 0; i < length; i++)
 776             skip_bits(gb, 8);  // slice_header_extension_data_byte
 777     }
 778
 779     // Inferred parameters
 780     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 781     if (sh->slice_qp > 51 ||
 782         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 783         av_log(s->avctx, AV_LOG_ERROR,
 784                "The slice_qp %d is outside the valid range "
 785                "[%d, 51].\n",
 786                sh->slice_qp,
 787                -s->ps.sps->qp_bd_offset);
 788         return AVERROR_INVALIDDATA;
 789     }
 790
 791     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 792
 793     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 794         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 795         return AVERROR_INVALIDDATA;
 796     }
 797
 798     if (get_bits_left(gb) < 0) {
 799         av_log(s->avctx, AV_LOG_ERROR,
 800                "Overread slice header by %d bits\n", -get_bits_left(gb));
 801         return AVERROR_INVALIDDATA;
 802     }
 803
 804     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 805
 806     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 807         s->HEVClc->qp_y = s->sh.slice_qp;
 808
 809     s->slice_initialized = 1;
 810     s->HEVClc->tu.cu_qp_offset_cb = 0;
 811     s->HEVClc->tu.cu_qp_offset_cr = 0;
 812
 813     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == NAL_CRA_NUT && s->last_eos);
 814
 815     return 0;
 816 }
 817
 818 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 819
 820 #define SET_SAO(elem, value)                            \
 821 do {                                                    \
 822     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 823         sao->elem = value;                              \
 824     else if (sao_merge_left_flag)                       \
 825         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 826     else if (sao_merge_up_flag)                         \
 827         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 828     else                                                \
 829         sao->elem = 0;                                  \
 830 } while (0)
 831
 832 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 833 {
 834     HEVCLocalContext *lc    = s->HEVClc;
 835     int sao_merge_left_flag = 0;
 836     int sao_merge_up_flag   = 0;
 837     SAOParams *sao          = &CTB(s->sao, rx, ry);
 838     int c_idx, i;
 839
 840     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 841         s->sh.slice_sample_adaptive_offset_flag[1]) {
 842         if (rx > 0) {
 843             if (lc->ctb_left_flag)
 844                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 845         }
 846         if (ry > 0 && !sao_merge_left_flag) {
 847             if (lc->ctb_up_flag)
 848                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 849         }
 850     }
 851
 852     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 853         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 854                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 855
 856         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 857             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 858             continue;
 859         }
 860
 861         if (c_idx == 2) {
 862             sao->type_idx[2] = sao->type_idx[1];
 863             sao->eo_class[2] = sao->eo_class[1];
 864         } else {
 865             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 866         }
 867
 868         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 869             continue;
 870
 871         for (i = 0; i < 4; i++)
 872             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 873
 874         if (sao->type_idx[c_idx] == SAO_BAND) {
 875             for (i = 0; i < 4; i++) {
 876                 if (sao->offset_abs[c_idx][i]) {
 877                     SET_SAO(offset_sign[c_idx][i],
 878                             ff_hevc_sao_offset_sign_decode(s));
 879                 } else {
 880                     sao->offset_sign[c_idx][i] = 0;
 881                 }
 882             }
 883             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 884         } else if (c_idx != 2) {
 885             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 886         }
 887
 888         // Inferred parameters
 889         sao->offset_val[c_idx][0] = 0;
 890         for (i = 0; i < 4; i++) {
 891             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 892             if (sao->type_idx[c_idx] == SAO_EDGE) {
 893                 if (i > 1)
 894                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 895             } else if (sao->offset_sign[c_idx][i]) {
 896                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 897             }
 898             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 899         }
 900     }
 901 }
 902
 903 #undef SET_SAO
 904 #undef CTB
 905
 906 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 907     HEVCLocalContext *lc    = s->HEVClc;
 908     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 909
 910     if (log2_res_scale_abs_plus1 !=  0) {
 911         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 912         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 913                                (1 - 2 * res_scale_sign_flag);
 914     } else {
 915         lc->tu.res_scale_val = 0;
 916     }
 917
 918
 919     return 0;
 920 }
 921
 922 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 923                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 924                               int log2_cb_size, int log2_trafo_size,
 925                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 926 {
 927     HEVCLocalContext *lc = s->HEVClc;
 928     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 929     int i;
 930
 931     if (lc->cu.pred_mode == MODE_INTRA) {
 932         int trafo_size = 1 << log2_trafo_size;
 933         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 934
 935         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 936     }
 937
 938     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 939         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 940         int scan_idx   = SCAN_DIAG;
 941         int scan_idx_c = SCAN_DIAG;
 942         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 943                          (s->ps.sps->chroma_format_idc == 2 &&
 944                          (cbf_cb[1] || cbf_cr[1]));
 945
 946         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 947             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 948             if (lc->tu.cu_qp_delta != 0)
 949                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 950                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 951             lc->tu.is_cu_qp_delta_coded = 1;
 952
 953             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 954                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 955                 av_log(s->avctx, AV_LOG_ERROR,
 956                        "The cu_qp_delta %d is outside the valid range "
 957                        "[%d, %d].\n",
 958                        lc->tu.cu_qp_delta,
 959                        -(26 + s->ps.sps->qp_bd_offset / 2),
 960                         (25 + s->ps.sps->qp_bd_offset / 2));
 961                 return AVERROR_INVALIDDATA;
 962             }
 963
 964             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 965         }
 966
 967         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 968             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 969             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 970             if (cu_chroma_qp_offset_flag) {
 971                 int cu_chroma_qp_offset_idx  = 0;
 972                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 973                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 974                     av_log(s->avctx, AV_LOG_ERROR,
 975                         "cu_chroma_qp_offset_idx not yet tested.\n");
 976                 }
 977                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 978                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 979             } else {
 980                 lc->tu.cu_qp_offset_cb = 0;
 981                 lc->tu.cu_qp_offset_cr = 0;
 982             }
 983             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 984         }
 985
 986         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 987             if (lc->tu.intra_pred_mode >= 6 &&
 988                 lc->tu.intra_pred_mode <= 14) {
 989                 scan_idx = SCAN_VERT;
 990             } else if (lc->tu.intra_pred_mode >= 22 &&
 991                        lc->tu.intra_pred_mode <= 30) {
 992                 scan_idx = SCAN_HORIZ;
 993             }
 994
 995             if (lc->tu.intra_pred_mode_c >=  6 &&
 996                 lc->tu.intra_pred_mode_c <= 14) {
 997                 scan_idx_c = SCAN_VERT;
 998             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 999                        lc->tu.intra_pred_mode_c <= 30) {
1000                 scan_idx_c = SCAN_HORIZ;
1001             }
1002         }
1003
1004         lc->tu.cross_pf = 0;
1005
1006         if (cbf_luma)
1007             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1008         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1009             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1010             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1011             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1012                                 (lc->cu.pred_mode == MODE_INTER ||
1013                                  (lc->tu.chroma_mode_c ==  4)));
1014
1015             if (lc->tu.cross_pf) {
1016                 hls_cross_component_pred(s, 0);
1017             }
1018             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1019                 if (lc->cu.pred_mode == MODE_INTRA) {
1020                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1021                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1022                 }
1023                 if (cbf_cb[i])
1024                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1025                                                 log2_trafo_size_c, scan_idx_c, 1);
1026                 else
1027                     if (lc->tu.cross_pf) {
1028                         ptrdiff_t stride = s->frame->linesize[1];
1029                         int hshift = s->ps.sps->hshift[1];
1030                         int vshift = s->ps.sps->vshift[1];
1031                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1032                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1033                         int size = 1 << log2_trafo_size_c;
1034
1035                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1036                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1037                         for (i = 0; i < (size * size); i++) {
1038                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1039                         }
1040                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1041                     }
1042             }
1043
1044             if (lc->tu.cross_pf) {
1045                 hls_cross_component_pred(s, 1);
1046             }
1047             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1048                 if (lc->cu.pred_mode == MODE_INTRA) {
1049                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1050                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1051                 }
1052                 if (cbf_cr[i])
1053                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1054                                                 log2_trafo_size_c, scan_idx_c, 2);
1055                 else
1056                     if (lc->tu.cross_pf) {
1057                         ptrdiff_t stride = s->frame->linesize[2];
1058                         int hshift = s->ps.sps->hshift[2];
1059                         int vshift = s->ps.sps->vshift[2];
1060                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1061                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1062                         int size = 1 << log2_trafo_size_c;
1063
1064                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1065                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1066                         for (i = 0; i < (size * size); i++) {
1067                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1068                         }
1069                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1070                     }
1071             }
1072         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1073             int trafo_size_h = 1 << (log2_trafo_size + 1);
1074             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1075             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1076                 if (lc->cu.pred_mode == MODE_INTRA) {
1077                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1078                                                     trafo_size_h, trafo_size_v);
1079                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1080                 }
1081                 if (cbf_cb[i])
1082                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1083                                                 log2_trafo_size, scan_idx_c, 1);
1084             }
1085             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1086                 if (lc->cu.pred_mode == MODE_INTRA) {
1087                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1088                                                 trafo_size_h, trafo_size_v);
1089                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1090                 }
1091                 if (cbf_cr[i])
1092                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1093                                                 log2_trafo_size, scan_idx_c, 2);
1094             }
1095         }
1096     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1097         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1098             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1099             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1100             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1101             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1102             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1103             if (s->ps.sps->chroma_format_idc == 2) {
1104                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1105                                                 trafo_size_h, trafo_size_v);
1106                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1107                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1108             }
1109         } else if (blk_idx == 3) {
1110             int trafo_size_h = 1 << (log2_trafo_size + 1);
1111             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1112             ff_hevc_set_neighbour_available(s, xBase, yBase,
1113                                             trafo_size_h, trafo_size_v);
1114             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1115             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1116             if (s->ps.sps->chroma_format_idc == 2) {
1117                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1118                                                 trafo_size_h, trafo_size_v);
1119                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1120                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1121             }
1122         }
1123     }
1124
1125     return 0;
1126 }
1127
1128 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1129 {
1130     int cb_size          = 1 << log2_cb_size;
1131     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1132
1133     int min_pu_width     = s->ps.sps->min_pu_width;
1134     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1135     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1136     int i, j;
1137
1138     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1139         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1140             s->is_pcm[i + j * min_pu_width] = 2;
1141 }
1142
1143 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1144                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1145                               int log2_cb_size, int log2_trafo_size,
1146                               int trafo_depth, int blk_idx,
1147                               const int *base_cbf_cb, const int *base_cbf_cr)
1148 {
1149     HEVCLocalContext *lc = s->HEVClc;
1150     uint8_t split_transform_flag;
1151     int cbf_cb[2];
1152     int cbf_cr[2];
1153     int ret;
1154
1155     cbf_cb[0] = base_cbf_cb[0];
1156     cbf_cb[1] = base_cbf_cb[1];
1157     cbf_cr[0] = base_cbf_cr[0];
1158     cbf_cr[1] = base_cbf_cr[1];
1159
1160     if (lc->cu.intra_split_flag) {
1161         if (trafo_depth == 1) {
1162             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1163             if (s->ps.sps->chroma_format_idc == 3) {
1164                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1165                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1166             } else {
1167                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1168                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1169             }
1170         }
1171     } else {
1172         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1173         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1174         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1175     }
1176
1177     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1178         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1179         trafo_depth     < lc->cu.max_trafo_depth       &&
1180         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1181         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1182     } else {
1183         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1184                           lc->cu.pred_mode == MODE_INTER &&
1185                           lc->cu.part_mode != PART_2Nx2N &&
1186                           trafo_depth == 0;
1187
1188         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1189                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1190                                inter_split;
1191     }
1192
1193     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1194         if (trafo_depth == 0 || cbf_cb[0]) {
1195             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1196             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1197                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1198             }
1199         }
1200
1201         if (trafo_depth == 0 || cbf_cr[0]) {
1202             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1203             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1204                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1205             }
1206         }
1207     }
1208
1209     if (split_transform_flag) {
1210         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1211         const int x1 = x0 + trafo_size_split;
1212         const int y1 = y0 + trafo_size_split;
1213
1214 #define SUBDIVIDE(x, y, idx)                                                    \
1215 do {                                                                            \
1216     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1217                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1218                              cbf_cb, cbf_cr);                                   \
1219     if (ret < 0)                                                                \
1220         return ret;                                                             \
1221 } while (0)
1222
1223         SUBDIVIDE(x0, y0, 0);
1224         SUBDIVIDE(x1, y0, 1);
1225         SUBDIVIDE(x0, y1, 2);
1226         SUBDIVIDE(x1, y1, 3);
1227
1228 #undef SUBDIVIDE
1229     } else {
1230         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1231         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1232         int min_tu_width     = s->ps.sps->min_tb_width;
1233         int cbf_luma         = 1;
1234
1235         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1236             cbf_cb[0] || cbf_cr[0] ||
1237             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1238             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1239         }
1240
1241         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1242                                  log2_cb_size, log2_trafo_size,
1243                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1244         if (ret < 0)
1245             return ret;
1246         // TODO: store cbf_luma somewhere else
1247         if (cbf_luma) {
1248             int i, j;
1249             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1250                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1251                     int x_tu = (x0 + j) >> log2_min_tu_size;
1252                     int y_tu = (y0 + i) >> log2_min_tu_size;
1253                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1254                 }
1255         }
1256         if (!s->sh.disable_deblocking_filter_flag) {
1257             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1258             if (s->ps.pps->transquant_bypass_enable_flag &&
1259                 lc->cu.cu_transquant_bypass_flag)
1260                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1261         }
1262     }
1263     return 0;
1264 }
1265
1266 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1267 {
1268     HEVCLocalContext *lc = s->HEVClc;
1269     GetBitContext gb;
1270     int cb_size   = 1 << log2_cb_size;
1271     int stride0   = s->frame->linesize[0];
1272     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1273     int   stride1 = s->frame->linesize[1];
1274     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1275     int   stride2 = s->frame->linesize[2];
1276     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1277
1278     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1279                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1280                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1281                           s->ps.sps->pcm.bit_depth_chroma;
1282     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1283     int ret;
1284
1285     if (!s->sh.disable_deblocking_filter_flag)
1286         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1287
1288     ret = init_get_bits(&gb, pcm, length);
1289     if (ret < 0)
1290         return ret;
1291
1292     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1293     if (s->ps.sps->chroma_format_idc) {
1294         s->hevcdsp.put_pcm(dst1, stride1,
1295                            cb_size >> s->ps.sps->hshift[1],
1296                            cb_size >> s->ps.sps->vshift[1],
1297                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1298         s->hevcdsp.put_pcm(dst2, stride2,
1299                            cb_size >> s->ps.sps->hshift[2],
1300                            cb_size >> s->ps.sps->vshift[2],
1301                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1302     }
1303
1304     return 0;
1305 }
1306
1307 /**
1308  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1309  *
1310  * @param s HEVC decoding context
1311  * @param dst target buffer for block data at block position
1312  * @param dststride stride of the dst buffer
1313  * @param ref reference picture buffer at origin (0, 0)
1314  * @param mv motion vector (relative to block position) to get pixel data from
1315  * @param x_off horizontal position of block from origin (0, 0)
1316  * @param y_off vertical position of block from origin (0, 0)
1317  * @param block_w width of block
1318  * @param block_h height of block
1319  * @param luma_weight weighting factor applied to the luma prediction
1320  * @param luma_offset additive offset applied to the luma prediction value
1321  */
1322
1323 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1324                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1325                         int block_w, int block_h, int luma_weight, int luma_offset)
1326 {
1327     HEVCLocalContext *lc = s->HEVClc;
1328     uint8_t *src         = ref->data[0];
1329     ptrdiff_t srcstride  = ref->linesize[0];
1330     int pic_width        = s->ps.sps->width;
1331     int pic_height       = s->ps.sps->height;
1332     int mx               = mv->x & 3;
1333     int my               = mv->y & 3;
1334     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1335                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1336     int idx              = ff_hevc_pel_weight[block_w];
1337
1338     x_off += mv->x >> 2;
1339     y_off += mv->y >> 2;
1340     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1341
1342     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1343         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1344         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1345         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1346         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1347         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1348
1349         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1350                                  edge_emu_stride, srcstride,
1351                                  block_w + QPEL_EXTRA,
1352                                  block_h + QPEL_EXTRA,
1353                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1354                                  pic_width, pic_height);
1355         src = lc->edge_emu_buffer + buf_offset;
1356         srcstride = edge_emu_stride;
1357     }
1358
1359     if (!weight_flag)
1360         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1361                                                       block_h, mx, my, block_w);
1362     else
1363         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1364                                                         block_h, s->sh.luma_log2_weight_denom,
1365                                                         luma_weight, luma_offset, mx, my, block_w);
1366 }
1367
1368 /**
1369  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1370  *
1371  * @param s HEVC decoding context
1372  * @param dst target buffer for block data at block position
1373  * @param dststride stride of the dst buffer
1374  * @param ref0 reference picture0 buffer at origin (0, 0)
1375  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1376  * @param x_off horizontal position of block from origin (0, 0)
1377  * @param y_off vertical position of block from origin (0, 0)
1378  * @param block_w width of block
1379  * @param block_h height of block
1380  * @param ref1 reference picture1 buffer at origin (0, 0)
1381  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1382  * @param current_mv current motion vector structure
1383  */
1384  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1385                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1386                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1387 {
1388     HEVCLocalContext *lc = s->HEVClc;
1389     ptrdiff_t src0stride  = ref0->linesize[0];
1390     ptrdiff_t src1stride  = ref1->linesize[0];
1391     int pic_width        = s->ps.sps->width;
1392     int pic_height       = s->ps.sps->height;
1393     int mx0              = mv0->x & 3;
1394     int my0              = mv0->y & 3;
1395     int mx1              = mv1->x & 3;
1396     int my1              = mv1->y & 3;
1397     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1398                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1399     int x_off0           = x_off + (mv0->x >> 2);
1400     int y_off0           = y_off + (mv0->y >> 2);
1401     int x_off1           = x_off + (mv1->x >> 2);
1402     int y_off1           = y_off + (mv1->y >> 2);
1403     int idx              = ff_hevc_pel_weight[block_w];
1404
1405     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1406     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1407
1408     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1409         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1410         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1411         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1412         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1413         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1414
1415         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1416                                  edge_emu_stride, src0stride,
1417                                  block_w + QPEL_EXTRA,
1418                                  block_h + QPEL_EXTRA,
1419                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1420                                  pic_width, pic_height);
1421         src0 = lc->edge_emu_buffer + buf_offset;
1422         src0stride = edge_emu_stride;
1423     }
1424
1425     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1426         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1427         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1428         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1429         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1430         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1431
1432         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1433                                  edge_emu_stride, src1stride,
1434                                  block_w + QPEL_EXTRA,
1435                                  block_h + QPEL_EXTRA,
1436                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1437                                  pic_width, pic_height);
1438         src1 = lc->edge_emu_buffer2 + buf_offset;
1439         src1stride = edge_emu_stride;
1440     }
1441
1442     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1443                                                 block_h, mx0, my0, block_w);
1444     if (!weight_flag)
1445         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1446                                                        block_h, mx1, my1, block_w);
1447     else
1448         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1449                                                          block_h, s->sh.luma_log2_weight_denom,
1450                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1451                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1452                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1453                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1454                                                          mx1, my1, block_w);
1455
1456 }
1457
1458 /**
1459  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1460  *
1461  * @param s HEVC decoding context
1462  * @param dst1 target buffer for block data at block position (U plane)
1463  * @param dst2 target buffer for block data at block position (V plane)
1464  * @param dststride stride of the dst1 and dst2 buffers
1465  * @param ref reference picture buffer at origin (0, 0)
1466  * @param mv motion vector (relative to block position) to get pixel data from
1467  * @param x_off horizontal position of block from origin (0, 0)
1468  * @param y_off vertical position of block from origin (0, 0)
1469  * @param block_w width of block
1470  * @param block_h height of block
1471  * @param chroma_weight weighting factor applied to the chroma prediction
1472  * @param chroma_offset additive offset applied to the chroma prediction value
1473  */
1474
1475 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1476                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1477                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1478 {
1479     HEVCLocalContext *lc = s->HEVClc;
1480     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1481     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1482     const Mv *mv         = &current_mv->mv[reflist];
1483     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1484                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1485     int idx              = ff_hevc_pel_weight[block_w];
1486     int hshift           = s->ps.sps->hshift[1];
1487     int vshift           = s->ps.sps->vshift[1];
1488     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1489     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1490     intptr_t _mx         = mx << (1 - hshift);
1491     intptr_t _my         = my << (1 - vshift);
1492
1493     x_off += mv->x >> (2 + hshift);
1494     y_off += mv->y >> (2 + vshift);
1495     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1496
1497     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1498         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1499         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1500         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1501         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1502         int buf_offset0 = EPEL_EXTRA_BEFORE *
1503                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1504         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1505                                  edge_emu_stride, srcstride,
1506                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1507                                  x_off - EPEL_EXTRA_BEFORE,
1508                                  y_off - EPEL_EXTRA_BEFORE,
1509                                  pic_width, pic_height);
1510
1511         src0 = lc->edge_emu_buffer + buf_offset0;
1512         srcstride = edge_emu_stride;
1513     }
1514     if (!weight_flag)
1515         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1516                                                   block_h, _mx, _my, block_w);
1517     else
1518         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1519                                                         block_h, s->sh.chroma_log2_weight_denom,
1520                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1521 }
1522
1523 /**
1524  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1525  *
1526  * @param s HEVC decoding context
1527  * @param dst target buffer for block data at block position
1528  * @param dststride stride of the dst buffer
1529  * @param ref0 reference picture0 buffer at origin (0, 0)
1530  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1531  * @param x_off horizontal position of block from origin (0, 0)
1532  * @param y_off vertical position of block from origin (0, 0)
1533  * @param block_w width of block
1534  * @param block_h height of block
1535  * @param ref1 reference picture1 buffer at origin (0, 0)
1536  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1537  * @param current_mv current motion vector structure
1538  * @param cidx chroma component(cb, cr)
1539  */
1540 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1541                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1542 {
1543     HEVCLocalContext *lc = s->HEVClc;
1544     uint8_t *src1        = ref0->data[cidx+1];
1545     uint8_t *src2        = ref1->data[cidx+1];
1546     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1547     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1548     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1549                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1550     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1551     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1552     Mv *mv0              = &current_mv->mv[0];
1553     Mv *mv1              = &current_mv->mv[1];
1554     int hshift = s->ps.sps->hshift[1];
1555     int vshift = s->ps.sps->vshift[1];
1556
1557     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1558     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1559     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1560     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1561     intptr_t _mx0 = mx0 << (1 - hshift);
1562     intptr_t _my0 = my0 << (1 - vshift);
1563     intptr_t _mx1 = mx1 << (1 - hshift);
1564     intptr_t _my1 = my1 << (1 - vshift);
1565
1566     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1567     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1568     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1569     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1570     int idx = ff_hevc_pel_weight[block_w];
1571     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1572     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1573
1574     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1575         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1576         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1577         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1578         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1579         int buf_offset1 = EPEL_EXTRA_BEFORE *
1580                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1581
1582         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1583                                  edge_emu_stride, src1stride,
1584                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1585                                  x_off0 - EPEL_EXTRA_BEFORE,
1586                                  y_off0 - EPEL_EXTRA_BEFORE,
1587                                  pic_width, pic_height);
1588
1589         src1 = lc->edge_emu_buffer + buf_offset1;
1590         src1stride = edge_emu_stride;
1591     }
1592
1593     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1594         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1595         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1596         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1597         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1598         int buf_offset1 = EPEL_EXTRA_BEFORE *
1599                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1600
1601         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1602                                  edge_emu_stride, src2stride,
1603                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1604                                  x_off1 - EPEL_EXTRA_BEFORE,
1605                                  y_off1 - EPEL_EXTRA_BEFORE,
1606                                  pic_width, pic_height);
1607
1608         src2 = lc->edge_emu_buffer2 + buf_offset1;
1609         src2stride = edge_emu_stride;
1610     }
1611
1612     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1613                                                 block_h, _mx0, _my0, block_w);
1614     if (!weight_flag)
1615         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1616                                                        src2, src2stride, lc->tmp,
1617                                                        block_h, _mx1, _my1, block_w);
1618     else
1619         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1620                                                          src2, src2stride, lc->tmp,
1621                                                          block_h,
1622                                                          s->sh.chroma_log2_weight_denom,
1623                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1624                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1625                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1626                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1627                                                          _mx1, _my1, block_w);
1628 }
1629
1630 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1631                                 const Mv *mv, int y0, int height)
1632 {
1633     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1634
1635     if (s->threads_type == FF_THREAD_FRAME )
1636         ff_thread_await_progress(&ref->tf, y, 0);
1637 }
1638
1639 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1640                                   int nPbH, int log2_cb_size, int part_idx,
1641                                   int merge_idx, MvField *mv)
1642 {
1643     HEVCLocalContext *lc = s->HEVClc;
1644     enum InterPredIdc inter_pred_idc = PRED_L0;
1645     int mvp_flag;
1646
1647     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1648     mv->pred_flag = 0;
1649     if (s->sh.slice_type == B_SLICE)
1650         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1651
1652     if (inter_pred_idc != PRED_L1) {
1653         if (s->sh.nb_refs[L0])
1654             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1655
1656         mv->pred_flag = PF_L0;
1657         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1658         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1659         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1660                                  part_idx, merge_idx, mv, mvp_flag, 0);
1661         mv->mv[0].x += lc->pu.mvd.x;
1662         mv->mv[0].y += lc->pu.mvd.y;
1663     }
1664
1665     if (inter_pred_idc != PRED_L0) {
1666         if (s->sh.nb_refs[L1])
1667             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1668
1669         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1670             AV_ZERO32(&lc->pu.mvd);
1671         } else {
1672             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1673         }
1674
1675         mv->pred_flag += PF_L1;
1676         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1677         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1678                                  part_idx, merge_idx, mv, mvp_flag, 1);
1679         mv->mv[1].x += lc->pu.mvd.x;
1680         mv->mv[1].y += lc->pu.mvd.y;
1681     }
1682 }
1683
1684 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1685                                 int nPbW, int nPbH,
1686                                 int log2_cb_size, int partIdx, int idx)
1687 {
1688 #define POS(c_idx, x, y)                                                              \
1689     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1690                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1691     HEVCLocalContext *lc = s->HEVClc;
1692     int merge_idx = 0;
1693     struct MvField current_mv = {{{ 0 }}};
1694
1695     int min_pu_width = s->ps.sps->min_pu_width;
1696
1697     MvField *tab_mvf = s->ref->tab_mvf;
1698     RefPicList  *refPicList = s->ref->refPicList;
1699     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1700     uint8_t *dst0 = POS(0, x0, y0);
1701     uint8_t *dst1 = POS(1, x0, y0);
1702     uint8_t *dst2 = POS(2, x0, y0);
1703     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1704     int min_cb_width     = s->ps.sps->min_cb_width;
1705     int x_cb             = x0 >> log2_min_cb_size;
1706     int y_cb             = y0 >> log2_min_cb_size;
1707     int x_pu, y_pu;
1708     int i, j;
1709
1710     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1711
1712     if (!skip_flag)
1713         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1714
1715     if (skip_flag || lc->pu.merge_flag) {
1716         if (s->sh.max_num_merge_cand > 1)
1717             merge_idx = ff_hevc_merge_idx_decode(s);
1718         else
1719             merge_idx = 0;
1720
1721         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1722                                    partIdx, merge_idx, &current_mv);
1723     } else {
1724         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1725                               partIdx, merge_idx, &current_mv);
1726     }
1727
1728     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1729     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1730
1731     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1732         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1733             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1734
1735     if (current_mv.pred_flag & PF_L0) {
1736         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1737         if (!ref0)
1738             return;
1739         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1740     }
1741     if (current_mv.pred_flag & PF_L1) {
1742         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1743         if (!ref1)
1744             return;
1745         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1746     }
1747
1748     if (current_mv.pred_flag == PF_L0) {
1749         int x0_c = x0 >> s->ps.sps->hshift[1];
1750         int y0_c = y0 >> s->ps.sps->vshift[1];
1751         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1752         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1753
1754         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1755                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1756                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1757                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1758
1759         if (s->ps.sps->chroma_format_idc) {
1760             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1761                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1762                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1763             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1764                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1765                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1766         }
1767     } else if (current_mv.pred_flag == PF_L1) {
1768         int x0_c = x0 >> s->ps.sps->hshift[1];
1769         int y0_c = y0 >> s->ps.sps->vshift[1];
1770         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1771         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1772
1773         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1774                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1775                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1776                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1777
1778         if (s->ps.sps->chroma_format_idc) {
1779             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1780                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1781                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1782
1783             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1784                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1785                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1786         }
1787     } else if (current_mv.pred_flag == PF_BI) {
1788         int x0_c = x0 >> s->ps.sps->hshift[1];
1789         int y0_c = y0 >> s->ps.sps->vshift[1];
1790         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1791         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1792
1793         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1794                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1795                    ref1->frame, &current_mv.mv[1], &current_mv);
1796
1797         if (s->ps.sps->chroma_format_idc) {
1798             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1799                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1800
1801             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1802                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1803         }
1804     }
1805 }
1806
1807 /**
1808  * 8.4.1
1809  */
1810 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1811                                 int prev_intra_luma_pred_flag)
1812 {
1813     HEVCLocalContext *lc = s->HEVClc;
1814     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1815     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1816     int min_pu_width     = s->ps.sps->min_pu_width;
1817     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1818     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1819     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1820
1821     int cand_up   = (lc->ctb_up_flag || y0b) ?
1822                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1823     int cand_left = (lc->ctb_left_flag || x0b) ?
1824                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1825
1826     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1827
1828     MvField *tab_mvf = s->ref->tab_mvf;
1829     int intra_pred_mode;
1830     int candidate[3];
1831     int i, j;
1832
1833     // intra_pred_mode prediction does not cross vertical CTB boundaries
1834     if ((y0 - 1) < y_ctb)
1835         cand_up = INTRA_DC;
1836
1837     if (cand_left == cand_up) {
1838         if (cand_left < 2) {
1839             candidate[0] = INTRA_PLANAR;
1840             candidate[1] = INTRA_DC;
1841             candidate[2] = INTRA_ANGULAR_26;
1842         } else {
1843             candidate[0] = cand_left;
1844             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1845             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1846         }
1847     } else {
1848         candidate[0] = cand_left;
1849         candidate[1] = cand_up;
1850         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1851             candidate[2] = INTRA_PLANAR;
1852         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1853             candidate[2] = INTRA_DC;
1854         } else {
1855             candidate[2] = INTRA_ANGULAR_26;
1856         }
1857     }
1858
1859     if (prev_intra_luma_pred_flag) {
1860         intra_pred_mode = candidate[lc->pu.mpm_idx];
1861     } else {
1862         if (candidate[0] > candidate[1])
1863             FFSWAP(uint8_t, candidate[0], candidate[1]);
1864         if (candidate[0] > candidate[2])
1865             FFSWAP(uint8_t, candidate[0], candidate[2]);
1866         if (candidate[1] > candidate[2])
1867             FFSWAP(uint8_t, candidate[1], candidate[2]);
1868
1869         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1870         for (i = 0; i < 3; i++)
1871             if (intra_pred_mode >= candidate[i])
1872                 intra_pred_mode++;
1873     }
1874
1875     /* write the intra prediction units into the mv array */
1876     if (!size_in_pus)
1877         size_in_pus = 1;
1878     for (i = 0; i < size_in_pus; i++) {
1879         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1880                intra_pred_mode, size_in_pus);
1881
1882         for (j = 0; j < size_in_pus; j++) {
1883             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1884         }
1885     }
1886
1887     return intra_pred_mode;
1888 }
1889
1890 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1891                                           int log2_cb_size, int ct_depth)
1892 {
1893     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1894     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1895     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1896     int y;
1897
1898     for (y = 0; y < length; y++)
1899         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1900                ct_depth, length);
1901 }
1902
1903 static const uint8_t tab_mode_idx[] = {
1904      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1905     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1906
1907 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1908                                   int log2_cb_size)
1909 {
1910     HEVCLocalContext *lc = s->HEVClc;
1911     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1912     uint8_t prev_intra_luma_pred_flag[4];
1913     int split   = lc->cu.part_mode == PART_NxN;
1914     int pb_size = (1 << log2_cb_size) >> split;
1915     int side    = split + 1;
1916     int chroma_mode;
1917     int i, j;
1918
1919     for (i = 0; i < side; i++)
1920         for (j = 0; j < side; j++)
1921             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1922
1923     for (i = 0; i < side; i++) {
1924         for (j = 0; j < side; j++) {
1925             if (prev_intra_luma_pred_flag[2 * i + j])
1926                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1927             else
1928                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1929
1930             lc->pu.intra_pred_mode[2 * i + j] =
1931                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1932                                      prev_intra_luma_pred_flag[2 * i + j]);
1933         }
1934     }
1935
1936     if (s->ps.sps->chroma_format_idc == 3) {
1937         for (i = 0; i < side; i++) {
1938             for (j = 0; j < side; j++) {
1939                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1940                 if (chroma_mode != 4) {
1941                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1942                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1943                     else
1944                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1945                 } else {
1946                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1947                 }
1948             }
1949         }
1950     } else if (s->ps.sps->chroma_format_idc == 2) {
1951         int mode_idx;
1952         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1953         if (chroma_mode != 4) {
1954             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1955                 mode_idx = 34;
1956             else
1957                 mode_idx = intra_chroma_table[chroma_mode];
1958         } else {
1959             mode_idx = lc->pu.intra_pred_mode[0];
1960         }
1961         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1962     } else if (s->ps.sps->chroma_format_idc != 0) {
1963         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1964         if (chroma_mode != 4) {
1965             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1966                 lc->pu.intra_pred_mode_c[0] = 34;
1967             else
1968                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1969         } else {
1970             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1971         }
1972     }
1973 }
1974
1975 static void intra_prediction_unit_default_value(HEVCContext *s,
1976                                                 int x0, int y0,
1977                                                 int log2_cb_size)
1978 {
1979     HEVCLocalContext *lc = s->HEVClc;
1980     int pb_size          = 1 << log2_cb_size;
1981     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1982     int min_pu_width     = s->ps.sps->min_pu_width;
1983     MvField *tab_mvf     = s->ref->tab_mvf;
1984     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1985     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1986     int j, k;
1987
1988     if (size_in_pus == 0)
1989         size_in_pus = 1;
1990     for (j = 0; j < size_in_pus; j++)
1991         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1992     if (lc->cu.pred_mode == MODE_INTRA)
1993         for (j = 0; j < size_in_pus; j++)
1994             for (k = 0; k < size_in_pus; k++)
1995                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1996 }
1997
1998 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1999 {
2000     int cb_size          = 1 << log2_cb_size;
2001     HEVCLocalContext *lc = s->HEVClc;
2002     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2003     int length           = cb_size >> log2_min_cb_size;
2004     int min_cb_width     = s->ps.sps->min_cb_width;
2005     int x_cb             = x0 >> log2_min_cb_size;
2006     int y_cb             = y0 >> log2_min_cb_size;
2007     int idx              = log2_cb_size - 2;
2008     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2009     int x, y, ret;
2010
2011     lc->cu.x                = x0;
2012     lc->cu.y                = y0;
2013     lc->cu.pred_mode        = MODE_INTRA;
2014     lc->cu.part_mode        = PART_2Nx2N;
2015     lc->cu.intra_split_flag = 0;
2016
2017     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2018     for (x = 0; x < 4; x++)
2019         lc->pu.intra_pred_mode[x] = 1;
2020     if (s->ps.pps->transquant_bypass_enable_flag) {
2021         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2022         if (lc->cu.cu_transquant_bypass_flag)
2023             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2024     } else
2025         lc->cu.cu_transquant_bypass_flag = 0;
2026
2027     if (s->sh.slice_type != I_SLICE) {
2028         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2029
2030         x = y_cb * min_cb_width + x_cb;
2031         for (y = 0; y < length; y++) {
2032             memset(&s->skip_flag[x], skip_flag, length);
2033             x += min_cb_width;
2034         }
2035         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2036     } else {
2037         x = y_cb * min_cb_width + x_cb;
2038         for (y = 0; y < length; y++) {
2039             memset(&s->skip_flag[x], 0, length);
2040             x += min_cb_width;
2041         }
2042     }
2043
2044     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2045         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2046         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2047
2048         if (!s->sh.disable_deblocking_filter_flag)
2049             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2050     } else {
2051         int pcm_flag = 0;
2052
2053         if (s->sh.slice_type != I_SLICE)
2054             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2055         if (lc->cu.pred_mode != MODE_INTRA ||
2056             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2057             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2058             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2059                                       lc->cu.pred_mode == MODE_INTRA;
2060         }
2061
2062         if (lc->cu.pred_mode == MODE_INTRA) {
2063             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2064                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2065                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2066                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2067             }
2068             if (pcm_flag) {
2069                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2070                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2071                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2072                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2073
2074                 if (ret < 0)
2075                     return ret;
2076             } else {
2077                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2078             }
2079         } else {
2080             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2081             switch (lc->cu.part_mode) {
2082             case PART_2Nx2N:
2083                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2084                 break;
2085             case PART_2NxN:
2086                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2087                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2088                 break;
2089             case PART_Nx2N:
2090                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2091                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2092                 break;
2093             case PART_2NxnU:
2094                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2095                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2096                 break;
2097             case PART_2NxnD:
2098                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2099                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2100                 break;
2101             case PART_nLx2N:
2102                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2103                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2104                 break;
2105             case PART_nRx2N:
2106                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2107                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2108                 break;
2109             case PART_NxN:
2110                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2111                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2112                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2113                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2114                 break;
2115             }
2116         }
2117
2118         if (!pcm_flag) {
2119             int rqt_root_cbf = 1;
2120
2121             if (lc->cu.pred_mode != MODE_INTRA &&
2122                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2123                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2124             }
2125             if (rqt_root_cbf) {
2126                 const static int cbf[2] = { 0 };
2127                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2128                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2129                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2130                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2131                                          log2_cb_size,
2132                                          log2_cb_size, 0, 0, cbf, cbf);
2133                 if (ret < 0)
2134                     return ret;
2135             } else {
2136                 if (!s->sh.disable_deblocking_filter_flag)
2137                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2138             }
2139         }
2140     }
2141
2142     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2143         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2144
2145     x = y_cb * min_cb_width + x_cb;
2146     for (y = 0; y < length; y++) {
2147         memset(&s->qp_y_tab[x], lc->qp_y, length);
2148         x += min_cb_width;
2149     }
2150
2151     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2152        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2153         lc->qPy_pred = lc->qp_y;
2154     }
2155
2156     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2157
2158     return 0;
2159 }
2160
2161 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2162                                int log2_cb_size, int cb_depth)
2163 {
2164     HEVCLocalContext *lc = s->HEVClc;
2165     const int cb_size    = 1 << log2_cb_size;
2166     int ret;
2167     int split_cu;
2168
2169     lc->ct_depth = cb_depth;
2170     if (x0 + cb_size <= s->ps.sps->width  &&
2171         y0 + cb_size <= s->ps.sps->height &&
2172         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2173         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2174     } else {
2175         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2176     }
2177     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2178         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2179         lc->tu.is_cu_qp_delta_coded = 0;
2180         lc->tu.cu_qp_delta          = 0;
2181     }
2182
2183     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2184         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2185         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2186     }
2187
2188     if (split_cu) {
2189         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2190         const int cb_size_split = cb_size >> 1;
2191         const int x1 = x0 + cb_size_split;
2192         const int y1 = y0 + cb_size_split;
2193
2194         int more_data = 0;
2195
2196         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2197         if (more_data < 0)
2198             return more_data;
2199
2200         if (more_data && x1 < s->ps.sps->width) {
2201             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2202             if (more_data < 0)
2203                 return more_data;
2204         }
2205         if (more_data && y1 < s->ps.sps->height) {
2206             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2207             if (more_data < 0)
2208                 return more_data;
2209         }
2210         if (more_data && x1 < s->ps.sps->width &&
2211             y1 < s->ps.sps->height) {
2212             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2213             if (more_data < 0)
2214                 return more_data;
2215         }
2216
2217         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2218             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2219             lc->qPy_pred = lc->qp_y;
2220
2221         if (more_data)
2222             return ((x1 + cb_size_split) < s->ps.sps->width ||
2223                     (y1 + cb_size_split) < s->ps.sps->height);
2224         else
2225             return 0;
2226     } else {
2227         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2228         if (ret < 0)
2229             return ret;
2230         if ((!((x0 + cb_size) %
2231                (1 << (s->ps.sps->log2_ctb_size))) ||
2232              (x0 + cb_size >= s->ps.sps->width)) &&
2233             (!((y0 + cb_size) %
2234                (1 << (s->ps.sps->log2_ctb_size))) ||
2235              (y0 + cb_size >= s->ps.sps->height))) {
2236             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2237             return !end_of_slice_flag;
2238         } else {
2239             return 1;
2240         }
2241     }
2242
2243     return 0;
2244 }
2245
2246 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2247                                  int ctb_addr_ts)
2248 {
2249     HEVCLocalContext *lc  = s->HEVClc;
2250     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2251     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2252     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2253
2254     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2255
2256     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2257         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2258             lc->first_qp_group = 1;
2259         lc->end_of_tiles_x = s->ps.sps->width;
2260     } else if (s->ps.pps->tiles_enabled_flag) {
2261         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2262             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2263             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2264             lc->first_qp_group   = 1;
2265         }
2266     } else {
2267         lc->end_of_tiles_x = s->ps.sps->width;
2268     }
2269
2270     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2271
2272     lc->boundary_flags = 0;
2273     if (s->ps.pps->tiles_enabled_flag) {
2274         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2275             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2276         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2277             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2278         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2279             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2280         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2281             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2282     } else {
2283         if (ctb_addr_in_slice <= 0)
2284             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2285         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2286             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2287     }
2288
2289     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2290     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2291     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2292     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2293 }
2294
2295 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2296 {
2297     HEVCContext *s  = avctxt->priv_data;
2298     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2299     int more_data   = 1;
2300     int x_ctb       = 0;
2301     int y_ctb       = 0;
2302     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2303
2304     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2305         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2306         return AVERROR_INVALIDDATA;
2307     }
2308
2309     if (s->sh.dependent_slice_segment_flag) {
2310         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2311         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2312             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2313             return AVERROR_INVALIDDATA;
2314         }
2315     }
2316
2317     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2318         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2319
2320         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2321         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2322         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2323
2324         ff_hevc_cabac_init(s, ctb_addr_ts);
2325
2326         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2327
2328         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2329         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2330         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2331
2332         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2333         if (more_data < 0) {
2334             s->tab_slice_address[ctb_addr_rs] = -1;
2335             return more_data;
2336         }
2337
2338
2339         ctb_addr_ts++;
2340         ff_hevc_save_states(s, ctb_addr_ts);
2341         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2342     }
2343
2344     if (x_ctb + ctb_size >= s->ps.sps->width &&
2345         y_ctb + ctb_size >= s->ps.sps->height)
2346         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2347
2348     return ctb_addr_ts;
2349 }
2350
2351 static int hls_slice_data(HEVCContext *s)
2352 {
2353     int arg[2];
2354     int ret[2];
2355
2356     arg[0] = 0;
2357     arg[1] = 1;
2358
2359     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2360     return ret[0];
2361 }
2362 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2363 {
2364     HEVCContext *s1  = avctxt->priv_data, *s;
2365     HEVCLocalContext *lc;
2366     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2367     int more_data   = 1;
2368     int *ctb_row_p    = input_ctb_row;
2369     int ctb_row = ctb_row_p[job];
2370     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2371     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2372     int thread = ctb_row % s1->threads_number;
2373     int ret;
2374
2375     s = s1->sList[self_id];
2376     lc = s->HEVClc;
2377
2378     if(ctb_row) {
2379         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2380
2381         if (ret < 0)
2382             return ret;
2383         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2384     }
2385
2386     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2387         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2388         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2389
2390         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2391
2392         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2393
2394         if (avpriv_atomic_int_get(&s1->wpp_err)){
2395             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2396             return 0;
2397         }
2398
2399         ff_hevc_cabac_init(s, ctb_addr_ts);
2400         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2401         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2402
2403         if (more_data < 0) {
2404             s->tab_slice_address[ctb_addr_rs] = -1;
2405             avpriv_atomic_int_set(&s1->wpp_err,  1);
2406             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2407             return more_data;
2408         }
2409
2410         ctb_addr_ts++;
2411
2412         ff_hevc_save_states(s, ctb_addr_ts);
2413         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2414         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2415
2416         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2417             avpriv_atomic_int_set(&s1->wpp_err,  1);
2418             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2419             return 0;
2420         }
2421
2422         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2423             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2424             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2425             return ctb_addr_ts;
2426         }
2427         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2428         x_ctb+=ctb_size;
2429
2430         if(x_ctb >= s->ps.sps->width) {
2431             break;
2432         }
2433     }
2434     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2435
2436     return 0;
2437 }
2438
2439 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2440 {
2441     const uint8_t *data = nal->data;
2442     int length          = nal->size;
2443     HEVCLocalContext *lc = s->HEVClc;
2444     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2445     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2446     int64_t offset;
2447     int64_t startheader, cmpt = 0;
2448     int i, j, res = 0;
2449
2450     if (!ret || !arg) {
2451         av_free(ret);
2452         av_free(arg);
2453         return AVERROR(ENOMEM);
2454     }
2455
2456     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2457         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2458             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2459             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2460         );
2461         res = AVERROR_INVALIDDATA;
2462         goto error;
2463     }
2464
2465     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2466
2467     if (!s->sList[1]) {
2468         for (i = 1; i < s->threads_number; i++) {
2469             s->sList[i] = av_malloc(sizeof(HEVCContext));
2470             memcpy(s->sList[i], s, sizeof(HEVCContext));
2471             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2472             s->sList[i]->HEVClc = s->HEVClcList[i];
2473         }
2474     }
2475
2476     offset = (lc->gb.index >> 3);
2477
2478     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2479         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2480             startheader--;
2481             cmpt++;
2482         }
2483     }
2484
2485     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2486         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2487         for (j = 0, cmpt = 0, startheader = offset
2488              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2489             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2490                 startheader--;
2491                 cmpt++;
2492             }
2493         }
2494         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2495         s->sh.offset[i - 1] = offset;
2496
2497     }
2498     if (s->sh.num_entry_point_offsets != 0) {
2499         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2500         if (length < offset) {
2501             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2502             res = AVERROR_INVALIDDATA;
2503             goto error;
2504         }
2505         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2506         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2507
2508     }
2509     s->data = data;
2510
2511     for (i = 1; i < s->threads_number; i++) {
2512         s->sList[i]->HEVClc->first_qp_group = 1;
2513         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2514         memcpy(s->sList[i], s, sizeof(HEVCContext));
2515         s->sList[i]->HEVClc = s->HEVClcList[i];
2516     }
2517
2518     avpriv_atomic_int_set(&s->wpp_err, 0);
2519     ff_reset_entries(s->avctx);
2520
2521     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2522         arg[i] = i;
2523         ret[i] = 0;
2524     }
2525
2526     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2527         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2528
2529     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2530         res += ret[i];
2531 error:
2532     av_free(ret);
2533     av_free(arg);
2534     return res;
2535 }
2536
2537 static int set_side_data(HEVCContext *s)
2538 {
2539     AVFrame *out = s->ref->frame;
2540
2541     if (s->sei_frame_packing_present &&
2542         s->frame_packing_arrangement_type >= 3 &&
2543         s->frame_packing_arrangement_type <= 5 &&
2544         s->content_interpretation_type > 0 &&
2545         s->content_interpretation_type < 3) {
2546         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2547         if (!stereo)
2548             return AVERROR(ENOMEM);
2549
2550         switch (s->frame_packing_arrangement_type) {
2551         case 3:
2552             if (s->quincunx_subsampling)
2553                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2554             else
2555                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2556             break;
2557         case 4:
2558             stereo->type = AV_STEREO3D_TOPBOTTOM;
2559             break;
2560         case 5:
2561             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2562             break;
2563         }
2564
2565         if (s->content_interpretation_type == 2)
2566             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2567     }
2568
2569     if (s->sei_display_orientation_present &&
2570         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2571         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2572         AVFrameSideData *rotation = av_frame_new_side_data(out,
2573                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2574                                                            sizeof(int32_t) * 9);
2575         if (!rotation)
2576             return AVERROR(ENOMEM);
2577
2578         av_display_rotation_set((int32_t *)rotation->data, angle);
2579         av_display_matrix_flip((int32_t *)rotation->data,
2580                                s->sei_hflip, s->sei_vflip);
2581     }
2582
2583     if (s->a53_caption) {
2584         AVFrameSideData* sd = av_frame_new_side_data(out,
2585                                                      AV_FRAME_DATA_A53_CC,
2586                                                      s->a53_caption_size);
2587         if (sd)
2588             memcpy(sd->data, s->a53_caption, s->a53_caption_size);
2589         av_freep(&s->a53_caption);
2590         s->a53_caption_size = 0;
2591         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2592     }
2593
2594     return 0;
2595 }
2596
2597 static int hevc_frame_start(HEVCContext *s)
2598 {
2599     HEVCLocalContext *lc = s->HEVClc;
2600     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2601                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2602     int ret;
2603
2604     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2605     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2606     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2607     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2608     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2609
2610     s->is_decoded        = 0;
2611     s->first_nal_type    = s->nal_unit_type;
2612
2613     if (s->ps.pps->tiles_enabled_flag)
2614         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2615
2616     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2617     if (ret < 0)
2618         goto fail;
2619
2620     ret = ff_hevc_frame_rps(s);
2621     if (ret < 0) {
2622         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2623         goto fail;
2624     }
2625
2626     s->ref->frame->key_frame = IS_IRAP(s);
2627
2628     ret = set_side_data(s);
2629     if (ret < 0)
2630         goto fail;
2631
2632     s->frame->pict_type = 3 - s->sh.slice_type;
2633
2634     if (!IS_IRAP(s))
2635         ff_hevc_bump_frame(s);
2636
2637     av_frame_unref(s->output_frame);
2638     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2639     if (ret < 0)
2640         goto fail;
2641
2642     if (!s->avctx->hwaccel)
2643         ff_thread_finish_setup(s->avctx);
2644
2645     return 0;
2646
2647 fail:
2648     if (s->ref)
2649         ff_hevc_unref_frame(s, s->ref, ~0);
2650     s->ref = NULL;
2651     return ret;
2652 }
2653
2654 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2655 {
2656     HEVCLocalContext *lc = s->HEVClc;
2657     GetBitContext *gb    = &lc->gb;
2658     int ctb_addr_ts, ret;
2659
2660     *gb              = nal->gb;
2661     s->nal_unit_type = nal->type;
2662     s->temporal_id   = nal->temporal_id;
2663
2664     switch (s->nal_unit_type) {
2665     case NAL_VPS:
2666         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2667         if (ret < 0)
2668             goto fail;
2669         break;
2670     case NAL_SPS:
2671         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2672                                      s->apply_defdispwin);
2673         if (ret < 0)
2674             goto fail;
2675         break;
2676     case NAL_PPS:
2677         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2678         if (ret < 0)
2679             goto fail;
2680         break;
2681     case NAL_SEI_PREFIX:
2682     case NAL_SEI_SUFFIX:
2683         ret = ff_hevc_decode_nal_sei(s);
2684         if (ret < 0)
2685             goto fail;
2686         break;
2687     case NAL_TRAIL_R:
2688     case NAL_TRAIL_N:
2689     case NAL_TSA_N:
2690     case NAL_TSA_R:
2691     case NAL_STSA_N:
2692     case NAL_STSA_R:
2693     case NAL_BLA_W_LP:
2694     case NAL_BLA_W_RADL:
2695     case NAL_BLA_N_LP:
2696     case NAL_IDR_W_RADL:
2697     case NAL_IDR_N_LP:
2698     case NAL_CRA_NUT:
2699     case NAL_RADL_N:
2700     case NAL_RADL_R:
2701     case NAL_RASL_N:
2702     case NAL_RASL_R:
2703         ret = hls_slice_header(s);
2704         if (ret < 0)
2705             return ret;
2706
2707         if (s->max_ra == INT_MAX) {
2708             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2709                 s->max_ra = s->poc;
2710             } else {
2711                 if (IS_IDR(s))
2712                     s->max_ra = INT_MIN;
2713             }
2714         }
2715
2716         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2717             s->poc <= s->max_ra) {
2718             s->is_decoded = 0;
2719             break;
2720         } else {
2721             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2722                 s->max_ra = INT_MIN;
2723         }
2724
2725         if (s->sh.first_slice_in_pic_flag) {
2726             ret = hevc_frame_start(s);
2727             if (ret < 0)
2728                 return ret;
2729         } else if (!s->ref) {
2730             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2731             goto fail;
2732         }
2733
2734         if (s->nal_unit_type != s->first_nal_type) {
2735             av_log(s->avctx, AV_LOG_ERROR,
2736                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2737                    s->first_nal_type, s->nal_unit_type);
2738             return AVERROR_INVALIDDATA;
2739         }
2740
2741         if (!s->sh.dependent_slice_segment_flag &&
2742             s->sh.slice_type != I_SLICE) {
2743             ret = ff_hevc_slice_rpl(s);
2744             if (ret < 0) {
2745                 av_log(s->avctx, AV_LOG_WARNING,
2746                        "Error constructing the reference lists for the current slice.\n");
2747                 goto fail;
2748             }
2749         }
2750
2751         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2752             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2753             if (ret < 0)
2754                 goto fail;
2755         }
2756
2757         if (s->avctx->hwaccel) {
2758             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2759             if (ret < 0)
2760                 goto fail;
2761         } else {
2762             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2763                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2764             else
2765                 ctb_addr_ts = hls_slice_data(s);
2766             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2767                 s->is_decoded = 1;
2768             }
2769
2770             if (ctb_addr_ts < 0) {
2771                 ret = ctb_addr_ts;
2772                 goto fail;
2773             }
2774         }
2775         break;
2776     case NAL_EOS_NUT:
2777     case NAL_EOB_NUT:
2778         s->seq_decode = (s->seq_decode + 1) & 0xff;
2779         s->max_ra     = INT_MAX;
2780         break;
2781     case NAL_AUD:
2782     case NAL_FD_NUT:
2783         break;
2784     default:
2785         av_log(s->avctx, AV_LOG_INFO,
2786                "Skipping NAL unit %d\n", s->nal_unit_type);
2787     }
2788
2789     return 0;
2790 fail:
2791     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2792         return ret;
2793     return 0;
2794 }
2795
2796 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2797 {
2798     int i, ret = 0;
2799
2800     s->ref = NULL;
2801     s->last_eos = s->eos;
2802     s->eos = 0;
2803
2804     /* split the input packet into NAL units, so we know the upper bound on the
2805      * number of slices in the frame */
2806     ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2807                                s->nal_length_size);
2808     if (ret < 0) {
2809         av_log(s->avctx, AV_LOG_ERROR,
2810                "Error splitting the input into NAL units.\n");
2811         return ret;
2812     }
2813
2814     for (i = 0; i < s->pkt.nb_nals; i++) {
2815         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2816             s->pkt.nals[i].type == NAL_EOS_NUT)
2817             s->eos = 1;
2818     }
2819
2820     /* decode the NAL units */
2821     for (i = 0; i < s->pkt.nb_nals; i++) {
2822         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2823         if (ret < 0) {
2824             av_log(s->avctx, AV_LOG_WARNING,
2825                    "Error parsing NAL unit #%d.\n", i);
2826             goto fail;
2827         }
2828     }
2829
2830 fail:
2831     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2832         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2833
2834     return ret;
2835 }
2836
2837 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2838 {
2839     int i;
2840     for (i = 0; i < 16; i++)
2841         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2842 }
2843
2844 static int verify_md5(HEVCContext *s, AVFrame *frame)
2845 {
2846     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2847     int pixel_shift;
2848     int i, j;
2849
2850     if (!desc)
2851         return AVERROR(EINVAL);
2852
2853     pixel_shift = desc->comp[0].depth > 8;
2854
2855     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2856            s->poc);
2857
2858     /* the checksums are LE, so we have to byteswap for >8bpp formats
2859      * on BE arches */
2860 #if HAVE_BIGENDIAN
2861     if (pixel_shift && !s->checksum_buf) {
2862         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2863                        FFMAX3(frame->linesize[0], frame->linesize[1],
2864                               frame->linesize[2]));
2865         if (!s->checksum_buf)
2866             return AVERROR(ENOMEM);
2867     }
2868 #endif
2869
2870     for (i = 0; frame->data[i]; i++) {
2871         int width  = s->avctx->coded_width;
2872         int height = s->avctx->coded_height;
2873         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2874         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2875         uint8_t md5[16];
2876
2877         av_md5_init(s->md5_ctx);
2878         for (j = 0; j < h; j++) {
2879             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2880 #if HAVE_BIGENDIAN
2881             if (pixel_shift) {
2882                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2883                                     (const uint16_t *) src, w);
2884                 src = s->checksum_buf;
2885             }
2886 #endif
2887             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2888         }
2889         av_md5_final(s->md5_ctx, md5);
2890
2891         if (!memcmp(md5, s->md5[i], 16)) {
2892             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2893             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2894             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2895         } else {
2896             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2897             print_md5(s->avctx, AV_LOG_ERROR, md5);
2898             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2899             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2900             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2901             return AVERROR_INVALIDDATA;
2902         }
2903     }
2904
2905     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2906
2907     return 0;
2908 }
2909
2910 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2911                              AVPacket *avpkt)
2912 {
2913     int ret;
2914     HEVCContext *s = avctx->priv_data;
2915
2916     if (!avpkt->size) {
2917         ret = ff_hevc_output_frame(s, data, 1);
2918         if (ret < 0)
2919             return ret;
2920
2921         *got_output = ret;
2922         return 0;
2923     }
2924
2925     s->ref = NULL;
2926     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2927     if (ret < 0)
2928         return ret;
2929
2930     if (avctx->hwaccel) {
2931         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
2932             av_log(avctx, AV_LOG_ERROR,
2933                    "hardware accelerator failed to decode picture\n");
2934             ff_hevc_unref_frame(s, s->ref, ~0);
2935             return ret;
2936         }
2937     } else {
2938         /* verify the SEI checksum */
2939         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2940             s->is_md5) {
2941             ret = verify_md5(s, s->ref->frame);
2942             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2943                 ff_hevc_unref_frame(s, s->ref, ~0);
2944                 return ret;
2945             }
2946         }
2947     }
2948     s->is_md5 = 0;
2949
2950     if (s->is_decoded) {
2951         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2952         s->is_decoded = 0;
2953     }
2954
2955     if (s->output_frame->buf[0]) {
2956         av_frame_move_ref(data, s->output_frame);
2957         *got_output = 1;
2958     }
2959
2960     return avpkt->size;
2961 }
2962
2963 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2964 {
2965     int ret;
2966
2967     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2968     if (ret < 0)
2969         return ret;
2970
2971     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2972     if (!dst->tab_mvf_buf)
2973         goto fail;
2974     dst->tab_mvf = src->tab_mvf;
2975
2976     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2977     if (!dst->rpl_tab_buf)
2978         goto fail;
2979     dst->rpl_tab = src->rpl_tab;
2980
2981     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2982     if (!dst->rpl_buf)
2983         goto fail;
2984
2985     dst->poc        = src->poc;
2986     dst->ctb_count  = src->ctb_count;
2987     dst->window     = src->window;
2988     dst->flags      = src->flags;
2989     dst->sequence   = src->sequence;
2990
2991     if (src->hwaccel_picture_private) {
2992         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2993         if (!dst->hwaccel_priv_buf)
2994             goto fail;
2995         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2996     }
2997
2998     return 0;
2999 fail:
3000     ff_hevc_unref_frame(s, dst, ~0);
3001     return AVERROR(ENOMEM);
3002 }
3003
3004 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3005 {
3006     HEVCContext       *s = avctx->priv_data;
3007     int i;
3008
3009     pic_arrays_free(s);
3010
3011     av_freep(&s->md5_ctx);
3012
3013     av_freep(&s->cabac_state);
3014
3015     for (i = 0; i < 3; i++) {
3016         av_freep(&s->sao_pixel_buffer_h[i]);
3017         av_freep(&s->sao_pixel_buffer_v[i]);
3018     }
3019     av_frame_free(&s->output_frame);
3020
3021     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3022         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3023         av_frame_free(&s->DPB[i].frame);
3024     }
3025
3026     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3027         av_buffer_unref(&s->ps.vps_list[i]);
3028     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3029         av_buffer_unref(&s->ps.sps_list[i]);
3030     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3031         av_buffer_unref(&s->ps.pps_list[i]);
3032     s->ps.sps = NULL;
3033     s->ps.pps = NULL;
3034     s->ps.vps = NULL;
3035
3036     av_freep(&s->sh.entry_point_offset);
3037     av_freep(&s->sh.offset);
3038     av_freep(&s->sh.size);
3039
3040     for (i = 1; i < s->threads_number; i++) {
3041         HEVCLocalContext *lc = s->HEVClcList[i];
3042         if (lc) {
3043             av_freep(&s->HEVClcList[i]);
3044             av_freep(&s->sList[i]);
3045         }
3046     }
3047     if (s->HEVClc == s->HEVClcList[0])
3048         s->HEVClc = NULL;
3049     av_freep(&s->HEVClcList[0]);
3050
3051     for (i = 0; i < s->pkt.nals_allocated; i++) {
3052         av_freep(&s->pkt.nals[i].rbsp_buffer);
3053         av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3054     }
3055     av_freep(&s->pkt.nals);
3056     s->pkt.nals_allocated = 0;
3057
3058     return 0;
3059 }
3060
3061 static av_cold int hevc_init_context(AVCodecContext *avctx)
3062 {
3063     HEVCContext *s = avctx->priv_data;
3064     int i;
3065
3066     s->avctx = avctx;
3067
3068     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3069     if (!s->HEVClc)
3070         goto fail;
3071     s->HEVClcList[0] = s->HEVClc;
3072     s->sList[0] = s;
3073
3074     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3075     if (!s->cabac_state)
3076         goto fail;
3077
3078     s->output_frame = av_frame_alloc();
3079     if (!s->output_frame)
3080         goto fail;
3081
3082     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3083         s->DPB[i].frame = av_frame_alloc();
3084         if (!s->DPB[i].frame)
3085             goto fail;
3086         s->DPB[i].tf.f = s->DPB[i].frame;
3087     }
3088
3089     s->max_ra = INT_MAX;
3090
3091     s->md5_ctx = av_md5_alloc();
3092     if (!s->md5_ctx)
3093         goto fail;
3094
3095     ff_bswapdsp_init(&s->bdsp);
3096
3097     s->context_initialized = 1;
3098     s->eos = 0;
3099
3100     return 0;
3101
3102 fail:
3103     hevc_decode_free(avctx);
3104     return AVERROR(ENOMEM);
3105 }
3106
3107 static int hevc_update_thread_context(AVCodecContext *dst,
3108                                       const AVCodecContext *src)
3109 {
3110     HEVCContext *s  = dst->priv_data;
3111     HEVCContext *s0 = src->priv_data;
3112     int i, ret;
3113
3114     if (!s->context_initialized) {
3115         ret = hevc_init_context(dst);
3116         if (ret < 0)
3117             return ret;
3118     }
3119
3120     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3121         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3122         if (s0->DPB[i].frame->buf[0]) {
3123             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3124             if (ret < 0)
3125                 return ret;
3126         }
3127     }
3128
3129     if (s->ps.sps != s0->ps.sps)
3130         s->ps.sps = NULL;
3131     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3132         av_buffer_unref(&s->ps.vps_list[i]);
3133         if (s0->ps.vps_list[i]) {
3134             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3135             if (!s->ps.vps_list[i])
3136                 return AVERROR(ENOMEM);
3137         }
3138     }
3139
3140     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3141         av_buffer_unref(&s->ps.sps_list[i]);
3142         if (s0->ps.sps_list[i]) {
3143             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3144             if (!s->ps.sps_list[i])
3145                 return AVERROR(ENOMEM);
3146         }
3147     }
3148
3149     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3150         av_buffer_unref(&s->ps.pps_list[i]);
3151         if (s0->ps.pps_list[i]) {
3152             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3153             if (!s->ps.pps_list[i])
3154                 return AVERROR(ENOMEM);
3155         }
3156     }
3157
3158     if (s->ps.sps != s0->ps.sps)
3159         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3160             return ret;
3161
3162     s->seq_decode = s0->seq_decode;
3163     s->seq_output = s0->seq_output;
3164     s->pocTid0    = s0->pocTid0;
3165     s->max_ra     = s0->max_ra;
3166     s->eos        = s0->eos;
3167     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3168
3169     s->is_nalff        = s0->is_nalff;
3170     s->nal_length_size = s0->nal_length_size;
3171
3172     s->threads_number      = s0->threads_number;
3173     s->threads_type        = s0->threads_type;
3174
3175     if (s0->eos) {
3176         s->seq_decode = (s->seq_decode + 1) & 0xff;
3177         s->max_ra = INT_MAX;
3178     }
3179
3180     return 0;
3181 }
3182
3183 static int hevc_decode_extradata(HEVCContext *s)
3184 {
3185     AVCodecContext *avctx = s->avctx;
3186     GetByteContext gb;
3187     int ret, i;
3188
3189     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3190
3191     if (avctx->extradata_size > 3 &&
3192         (avctx->extradata[0] || avctx->extradata[1] ||
3193          avctx->extradata[2] > 1)) {
3194         /* It seems the extradata is encoded as hvcC format.
3195          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3196          * is finalized. When finalized, configurationVersion will be 1 and we
3197          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3198         int i, j, num_arrays, nal_len_size;
3199
3200         s->is_nalff = 1;
3201
3202         bytestream2_skip(&gb, 21);
3203         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3204         num_arrays   = bytestream2_get_byte(&gb);
3205
3206         /* nal units in the hvcC always have length coded with 2 bytes,
3207          * so put a fake nal_length_size = 2 while parsing them */
3208         s->nal_length_size = 2;
3209
3210         /* Decode nal units from hvcC. */
3211         for (i = 0; i < num_arrays; i++) {
3212             int type = bytestream2_get_byte(&gb) & 0x3f;
3213             int cnt  = bytestream2_get_be16(&gb);
3214
3215             for (j = 0; j < cnt; j++) {
3216                 // +2 for the nal size field
3217                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3218                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3219                     av_log(s->avctx, AV_LOG_ERROR,
3220                            "Invalid NAL unit size in extradata.\n");
3221                     return AVERROR_INVALIDDATA;
3222                 }
3223
3224                 ret = decode_nal_units(s, gb.buffer, nalsize);
3225                 if (ret < 0) {
3226                     av_log(avctx, AV_LOG_ERROR,
3227                            "Decoding nal unit %d %d from hvcC failed\n",
3228                            type, i);
3229                     return ret;
3230                 }
3231                 bytestream2_skip(&gb, nalsize);
3232             }
3233         }
3234
3235         /* Now store right nal length size, that will be used to parse
3236          * all other nals */
3237         s->nal_length_size = nal_len_size;
3238     } else {
3239         s->is_nalff = 0;
3240         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3241         if (ret < 0)
3242             return ret;
3243     }
3244
3245     /* export stream parameters from the first SPS */
3246     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3247         if (s->ps.sps_list[i]) {
3248             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3249             export_stream_params(s->avctx, &s->ps, sps);
3250             break;
3251         }
3252     }
3253
3254     return 0;
3255 }
3256
3257 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3258 {
3259     HEVCContext *s = avctx->priv_data;
3260     int ret;
3261
3262     avctx->internal->allocate_progress = 1;
3263
3264     ret = hevc_init_context(avctx);
3265     if (ret < 0)
3266         return ret;
3267
3268     s->enable_parallel_tiles = 0;
3269     s->picture_struct = 0;
3270     s->eos = 1;
3271
3272     if(avctx->active_thread_type & FF_THREAD_SLICE)
3273         s->threads_number = avctx->thread_count;
3274     else
3275         s->threads_number = 1;
3276
3277     if (avctx->extradata_size > 0 && avctx->extradata) {
3278         ret = hevc_decode_extradata(s);
3279         if (ret < 0) {
3280             hevc_decode_free(avctx);
3281             return ret;
3282         }
3283     }
3284
3285     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3286             s->threads_type = FF_THREAD_FRAME;
3287         else
3288             s->threads_type = FF_THREAD_SLICE;
3289
3290     return 0;
3291 }
3292
3293 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3294 {
3295     HEVCContext *s = avctx->priv_data;
3296     int ret;
3297
3298     memset(s, 0, sizeof(*s));
3299
3300     ret = hevc_init_context(avctx);
3301     if (ret < 0)
3302         return ret;
3303
3304     return 0;
3305 }
3306
3307 static void hevc_decode_flush(AVCodecContext *avctx)
3308 {
3309     HEVCContext *s = avctx->priv_data;
3310     ff_hevc_flush_dpb(s);
3311     s->max_ra = INT_MAX;
3312     s->eos = 1;
3313 }
3314
3315 #define OFFSET(x) offsetof(HEVCContext, x)
3316 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3317
3318 static const AVOption options[] = {
3319     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3320         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3321     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3322         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3323     { NULL },
3324 };
3325
3326 static const AVClass hevc_decoder_class = {
3327     .class_name = "HEVC decoder",
3328     .item_name  = av_default_item_name,
3329     .option     = options,
3330     .version    = LIBAVUTIL_VERSION_INT,
3331 };
3332
3333 AVCodec ff_hevc_decoder = {
3334     .name                  = "hevc",
3335     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3336     .type                  = AVMEDIA_TYPE_VIDEO,
3337     .id                    = AV_CODEC_ID_HEVC,
3338     .priv_data_size        = sizeof(HEVCContext),
3339     .priv_class            = &hevc_decoder_class,
3340     .init                  = hevc_decode_init,
3341     .close                 = hevc_decode_free,
3342     .decode                = hevc_decode_frame,
3343     .flush                 = hevc_decode_flush,
3344     .update_thread_context = hevc_update_thread_context,
3345     .init_thread_copy      = hevc_init_thread_copy,
3346     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3347                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3348     .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
3349 };