git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->ps.sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->ps.sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->ps.sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->ps.sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx, const HEVCParamSets *ps,
 284                                  const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)ps->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VAAPI_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     pic_arrays_free(s);
 336     s->ps.sps = NULL;
 337     s->ps.vps = NULL;
 338
 339     if (!sps)
 340         return 0;
 341
 342     ret = pic_arrays_init(s, sps);
 343     if (ret < 0)
 344         goto fail;
 345
 346     export_stream_params(s->avctx, &s->ps, sps);
 347
 348     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 349 #if CONFIG_HEVC_DXVA2_HWACCEL
 350         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 351 #endif
 352 #if CONFIG_HEVC_D3D11VA_HWACCEL
 353         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 354 #endif
 355 #if CONFIG_HEVC_VAAPI_HWACCEL
 356         *fmt++ = AV_PIX_FMT_VAAPI;
 357 #endif
 358 #if CONFIG_HEVC_VDPAU_HWACCEL
 359         *fmt++ = AV_PIX_FMT_VDPAU;
 360 #endif
 361     }
 362
 363     if (pix_fmt == AV_PIX_FMT_NONE) {
 364         *fmt++ = sps->pix_fmt;
 365         *fmt = AV_PIX_FMT_NONE;
 366
 367         ret = ff_thread_get_format(s->avctx, pix_fmts);
 368         if (ret < 0)
 369             goto fail;
 370         s->avctx->pix_fmt = ret;
 371     }
 372     else {
 373         s->avctx->pix_fmt = pix_fmt;
 374     }
 375
 376     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 377     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 378     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 379
 380     for (i = 0; i < 3; i++) {
 381         av_freep(&s->sao_pixel_buffer_h[i]);
 382         av_freep(&s->sao_pixel_buffer_v[i]);
 383     }
 384
 385     if (sps->sao_enabled && !s->avctx->hwaccel) {
 386         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 387         int c_idx;
 388
 389         for(c_idx = 0; c_idx < c_count; c_idx++) {
 390             int w = sps->width >> sps->hshift[c_idx];
 391             int h = sps->height >> sps->vshift[c_idx];
 392             s->sao_pixel_buffer_h[c_idx] =
 393                 av_malloc((w * 2 * sps->ctb_height) <<
 394                           sps->pixel_shift);
 395             s->sao_pixel_buffer_v[c_idx] =
 396                 av_malloc((h * 2 * sps->ctb_width) <<
 397                           sps->pixel_shift);
 398         }
 399     }
 400
 401     s->ps.sps = sps;
 402     s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
 403
 404     return 0;
 405
 406 fail:
 407     pic_arrays_free(s);
 408     s->ps.sps = NULL;
 409     return ret;
 410 }
 411
 412 static int hls_slice_header(HEVCContext *s)
 413 {
 414     GetBitContext *gb = &s->HEVClc->gb;
 415     SliceHeader *sh   = &s->sh;
 416     int i, ret;
 417
 418     // Coded parameters
 419     sh->first_slice_in_pic_flag = get_bits1(gb);
 420     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 421         s->seq_decode = (s->seq_decode + 1) & 0xff;
 422         s->max_ra     = INT_MAX;
 423         if (IS_IDR(s))
 424             ff_hevc_clear_refs(s);
 425     }
 426     sh->no_output_of_prior_pics_flag = 0;
 427     if (IS_IRAP(s))
 428         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 429
 430     sh->pps_id = get_ue_golomb_long(gb);
 431     if (sh->pps_id >= MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
 432         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 433         return AVERROR_INVALIDDATA;
 434     }
 435     if (!sh->first_slice_in_pic_flag &&
 436         s->ps.pps != (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data) {
 437         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 438         return AVERROR_INVALIDDATA;
 439     }
 440     s->ps.pps = (HEVCPPS*)s->ps.pps_list[sh->pps_id]->data;
 441     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 442         sh->no_output_of_prior_pics_flag = 1;
 443
 444     if (s->ps.sps != (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
 445         const HEVCSPS* last_sps = s->ps.sps;
 446         s->ps.sps = (HEVCSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
 447         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 448             if (s->ps.sps->width !=  last_sps->width || s->ps.sps->height != last_sps->height ||
 449                 s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering !=
 450                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 451                 sh->no_output_of_prior_pics_flag = 0;
 452         }
 453         ff_hevc_clear_refs(s);
 454         ret = set_sps(s, s->ps.sps, AV_PIX_FMT_NONE);
 455         if (ret < 0)
 456             return ret;
 457
 458         s->seq_decode = (s->seq_decode + 1) & 0xff;
 459         s->max_ra     = INT_MAX;
 460     }
 461
 462     sh->dependent_slice_segment_flag = 0;
 463     if (!sh->first_slice_in_pic_flag) {
 464         int slice_address_length;
 465
 466         if (s->ps.pps->dependent_slice_segments_enabled_flag)
 467             sh->dependent_slice_segment_flag = get_bits1(gb);
 468
 469         slice_address_length = av_ceil_log2(s->ps.sps->ctb_width *
 470                                             s->ps.sps->ctb_height);
 471         sh->slice_segment_addr = slice_address_length ? get_bits(gb, slice_address_length) : 0;
 472         if (sh->slice_segment_addr >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
 473             av_log(s->avctx, AV_LOG_ERROR,
 474                    "Invalid slice segment address: %u.\n",
 475                    sh->slice_segment_addr);
 476             return AVERROR_INVALIDDATA;
 477         }
 478
 479         if (!sh->dependent_slice_segment_flag) {
 480             sh->slice_addr = sh->slice_segment_addr;
 481             s->slice_idx++;
 482         }
 483     } else {
 484         sh->slice_segment_addr = sh->slice_addr = 0;
 485         s->slice_idx           = 0;
 486         s->slice_initialized   = 0;
 487     }
 488
 489     if (!sh->dependent_slice_segment_flag) {
 490         s->slice_initialized = 0;
 491
 492         for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
 493             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 494
 495         sh->slice_type = get_ue_golomb_long(gb);
 496         if (!(sh->slice_type == I_SLICE ||
 497               sh->slice_type == P_SLICE ||
 498               sh->slice_type == B_SLICE)) {
 499             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 500                    sh->slice_type);
 501             return AVERROR_INVALIDDATA;
 502         }
 503         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 504             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 505             return AVERROR_INVALIDDATA;
 506         }
 507
 508         // when flag is not present, picture is inferred to be output
 509         sh->pic_output_flag = 1;
 510         if (s->ps.pps->output_flag_present_flag)
 511             sh->pic_output_flag = get_bits1(gb);
 512
 513         if (s->ps.sps->separate_colour_plane_flag)
 514             sh->colour_plane_id = get_bits(gb, 2);
 515
 516         if (!IS_IDR(s)) {
 517             int poc, pos;
 518
 519             sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
 520             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 521             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 522                 av_log(s->avctx, AV_LOG_WARNING,
 523                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 524                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 525                     return AVERROR_INVALIDDATA;
 526                 poc = s->poc;
 527             }
 528             s->poc = poc;
 529
 530             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 531             pos = get_bits_left(gb);
 532             if (!sh->short_term_ref_pic_set_sps_flag) {
 533                 ret = ff_hevc_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
 534                 if (ret < 0)
 535                     return ret;
 536
 537                 sh->short_term_rps = &sh->slice_rps;
 538             } else {
 539                 int numbits, rps_idx;
 540
 541                 if (!s->ps.sps->nb_st_rps) {
 542                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 543                     return AVERROR_INVALIDDATA;
 544                 }
 545
 546                 numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
 547                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 548                 sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
 549             }
 550             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 551
 552             pos = get_bits_left(gb);
 553             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 554             if (ret < 0) {
 555                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 556                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 557                     return AVERROR_INVALIDDATA;
 558             }
 559             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 560
 561             if (s->ps.sps->sps_temporal_mvp_enabled_flag)
 562                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 563             else
 564                 sh->slice_temporal_mvp_enabled_flag = 0;
 565         } else {
 566             s->sh.short_term_rps = NULL;
 567             s->poc               = 0;
 568         }
 569
 570         /* 8.3.1 */
 571         if (s->temporal_id == 0 &&
 572             s->nal_unit_type != NAL_TRAIL_N &&
 573             s->nal_unit_type != NAL_TSA_N   &&
 574             s->nal_unit_type != NAL_STSA_N  &&
 575             s->nal_unit_type != NAL_RADL_N  &&
 576             s->nal_unit_type != NAL_RADL_R  &&
 577             s->nal_unit_type != NAL_RASL_N  &&
 578             s->nal_unit_type != NAL_RASL_R)
 579             s->pocTid0 = s->poc;
 580
 581         if (s->ps.sps->sao_enabled) {
 582             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 583             if (s->ps.sps->chroma_format_idc) {
 584                 sh->slice_sample_adaptive_offset_flag[1] =
 585                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 586             }
 587         } else {
 588             sh->slice_sample_adaptive_offset_flag[0] = 0;
 589             sh->slice_sample_adaptive_offset_flag[1] = 0;
 590             sh->slice_sample_adaptive_offset_flag[2] = 0;
 591         }
 592
 593         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 594         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 595             int nb_refs;
 596
 597             sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
 598             if (sh->slice_type == B_SLICE)
 599                 sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
 600
 601             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 602                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 603                 if (sh->slice_type == B_SLICE)
 604                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 605             }
 606             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 607                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 608                        sh->nb_refs[L0], sh->nb_refs[L1]);
 609                 return AVERROR_INVALIDDATA;
 610             }
 611
 612             sh->rpl_modification_flag[0] = 0;
 613             sh->rpl_modification_flag[1] = 0;
 614             nb_refs = ff_hevc_frame_nb_refs(s);
 615             if (!nb_refs) {
 616                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 617                 return AVERROR_INVALIDDATA;
 618             }
 619
 620             if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
 621                 sh->rpl_modification_flag[0] = get_bits1(gb);
 622                 if (sh->rpl_modification_flag[0]) {
 623                     for (i = 0; i < sh->nb_refs[L0]; i++)
 624                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 625                 }
 626
 627                 if (sh->slice_type == B_SLICE) {
 628                     sh->rpl_modification_flag[1] = get_bits1(gb);
 629                     if (sh->rpl_modification_flag[1] == 1)
 630                         for (i = 0; i < sh->nb_refs[L1]; i++)
 631                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 632                 }
 633             }
 634
 635             if (sh->slice_type == B_SLICE)
 636                 sh->mvd_l1_zero_flag = get_bits1(gb);
 637
 638             if (s->ps.pps->cabac_init_present_flag)
 639                 sh->cabac_init_flag = get_bits1(gb);
 640             else
 641                 sh->cabac_init_flag = 0;
 642
 643             sh->collocated_ref_idx = 0;
 644             if (sh->slice_temporal_mvp_enabled_flag) {
 645                 sh->collocated_list = L0;
 646                 if (sh->slice_type == B_SLICE)
 647                     sh->collocated_list = !get_bits1(gb);
 648
 649                 if (sh->nb_refs[sh->collocated_list] > 1) {
 650                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 651                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 652                         av_log(s->avctx, AV_LOG_ERROR,
 653                                "Invalid collocated_ref_idx: %d.\n",
 654                                sh->collocated_ref_idx);
 655                         return AVERROR_INVALIDDATA;
 656                     }
 657                 }
 658             }
 659
 660             if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 661                 (s->ps.pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 662                 pred_weight_table(s, gb);
 663             }
 664
 665             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 666             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 667                 av_log(s->avctx, AV_LOG_ERROR,
 668                        "Invalid number of merging MVP candidates: %d.\n",
 669                        sh->max_num_merge_cand);
 670                 return AVERROR_INVALIDDATA;
 671             }
 672         }
 673
 674         sh->slice_qp_delta = get_se_golomb(gb);
 675
 676         if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 677             sh->slice_cb_qp_offset = get_se_golomb(gb);
 678             sh->slice_cr_qp_offset = get_se_golomb(gb);
 679         } else {
 680             sh->slice_cb_qp_offset = 0;
 681             sh->slice_cr_qp_offset = 0;
 682         }
 683
 684         if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
 685             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 686         else
 687             sh->cu_chroma_qp_offset_enabled_flag = 0;
 688
 689         if (s->ps.pps->deblocking_filter_control_present_flag) {
 690             int deblocking_filter_override_flag = 0;
 691
 692             if (s->ps.pps->deblocking_filter_override_enabled_flag)
 693                 deblocking_filter_override_flag = get_bits1(gb);
 694
 695             if (deblocking_filter_override_flag) {
 696                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 697                 if (!sh->disable_deblocking_filter_flag) {
 698                     sh->beta_offset = get_se_golomb(gb) * 2;
 699                     sh->tc_offset   = get_se_golomb(gb) * 2;
 700                 }
 701             } else {
 702                 sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
 703                 sh->beta_offset                    = s->ps.pps->beta_offset;
 704                 sh->tc_offset                      = s->ps.pps->tc_offset;
 705             }
 706         } else {
 707             sh->disable_deblocking_filter_flag = 0;
 708             sh->beta_offset                    = 0;
 709             sh->tc_offset                      = 0;
 710         }
 711
 712         if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
 713             (sh->slice_sample_adaptive_offset_flag[0] ||
 714              sh->slice_sample_adaptive_offset_flag[1] ||
 715              !sh->disable_deblocking_filter_flag)) {
 716             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 717         } else {
 718             sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 719         }
 720     } else if (!s->slice_initialized) {
 721         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 722         return AVERROR_INVALIDDATA;
 723     }
 724
 725     sh->num_entry_point_offsets = 0;
 726     if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 727         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 728         // It would be possible to bound this tighter but this here is simpler
 729         if (num_entry_point_offsets > get_bits_left(gb)) {
 730             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 731             return AVERROR_INVALIDDATA;
 732         }
 733
 734         sh->num_entry_point_offsets = num_entry_point_offsets;
 735         if (sh->num_entry_point_offsets > 0) {
 736             int offset_len = get_ue_golomb_long(gb) + 1;
 737
 738             if (offset_len < 1 || offset_len > 32) {
 739                 sh->num_entry_point_offsets = 0;
 740                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 741                 return AVERROR_INVALIDDATA;
 742             }
 743
 744             av_freep(&sh->entry_point_offset);
 745             av_freep(&sh->offset);
 746             av_freep(&sh->size);
 747             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 748             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 749             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 750             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 751                 sh->num_entry_point_offsets = 0;
 752                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 753                 return AVERROR(ENOMEM);
 754             }
 755             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 756                 unsigned val = get_bits_long(gb, offset_len);
 757                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 758             }
 759             if (s->threads_number > 1 && (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1)) {
 760                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 761                 s->threads_number = 1;
 762             } else
 763                 s->enable_parallel_tiles = 0;
 764         } else
 765             s->enable_parallel_tiles = 0;
 766     }
 767
 768     if (s->ps.pps->slice_header_extension_present_flag) {
 769         unsigned int length = get_ue_golomb_long(gb);
 770         if (length*8LL > get_bits_left(gb)) {
 771             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 772             return AVERROR_INVALIDDATA;
 773         }
 774         for (i = 0; i < length; i++)
 775             skip_bits(gb, 8);  // slice_header_extension_data_byte
 776     }
 777
 778     // Inferred parameters
 779     sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 780     if (sh->slice_qp > 51 ||
 781         sh->slice_qp < -s->ps.sps->qp_bd_offset) {
 782         av_log(s->avctx, AV_LOG_ERROR,
 783                "The slice_qp %d is outside the valid range "
 784                "[%d, 51].\n",
 785                sh->slice_qp,
 786                -s->ps.sps->qp_bd_offset);
 787         return AVERROR_INVALIDDATA;
 788     }
 789
 790     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 791
 792     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 793         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 794         return AVERROR_INVALIDDATA;
 795     }
 796
 797     if (get_bits_left(gb) < 0) {
 798         av_log(s->avctx, AV_LOG_ERROR,
 799                "Overread slice header by %d bits\n", -get_bits_left(gb));
 800         return AVERROR_INVALIDDATA;
 801     }
 802
 803     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 804
 805     if (!s->ps.pps->cu_qp_delta_enabled_flag)
 806         s->HEVClc->qp_y = s->sh.slice_qp;
 807
 808     s->slice_initialized = 1;
 809     s->HEVClc->tu.cu_qp_offset_cb = 0;
 810     s->HEVClc->tu.cu_qp_offset_cr = 0;
 811
 812     s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == NAL_CRA_NUT && s->last_eos);
 813
 814     return 0;
 815 }
 816
 817 #define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
 818
 819 #define SET_SAO(elem, value)                            \
 820 do {                                                    \
 821     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 822         sao->elem = value;                              \
 823     else if (sao_merge_left_flag)                       \
 824         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 825     else if (sao_merge_up_flag)                         \
 826         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 827     else                                                \
 828         sao->elem = 0;                                  \
 829 } while (0)
 830
 831 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 832 {
 833     HEVCLocalContext *lc    = s->HEVClc;
 834     int sao_merge_left_flag = 0;
 835     int sao_merge_up_flag   = 0;
 836     SAOParams *sao          = &CTB(s->sao, rx, ry);
 837     int c_idx, i;
 838
 839     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 840         s->sh.slice_sample_adaptive_offset_flag[1]) {
 841         if (rx > 0) {
 842             if (lc->ctb_left_flag)
 843                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 844         }
 845         if (ry > 0 && !sao_merge_left_flag) {
 846             if (lc->ctb_up_flag)
 847                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 848         }
 849     }
 850
 851     for (c_idx = 0; c_idx < (s->ps.sps->chroma_format_idc ? 3 : 1); c_idx++) {
 852         int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
 853                                                  s->ps.pps->log2_sao_offset_scale_chroma;
 854
 855         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 856             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 857             continue;
 858         }
 859
 860         if (c_idx == 2) {
 861             sao->type_idx[2] = sao->type_idx[1];
 862             sao->eo_class[2] = sao->eo_class[1];
 863         } else {
 864             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 865         }
 866
 867         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 868             continue;
 869
 870         for (i = 0; i < 4; i++)
 871             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 872
 873         if (sao->type_idx[c_idx] == SAO_BAND) {
 874             for (i = 0; i < 4; i++) {
 875                 if (sao->offset_abs[c_idx][i]) {
 876                     SET_SAO(offset_sign[c_idx][i],
 877                             ff_hevc_sao_offset_sign_decode(s));
 878                 } else {
 879                     sao->offset_sign[c_idx][i] = 0;
 880                 }
 881             }
 882             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 883         } else if (c_idx != 2) {
 884             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 885         }
 886
 887         // Inferred parameters
 888         sao->offset_val[c_idx][0] = 0;
 889         for (i = 0; i < 4; i++) {
 890             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 891             if (sao->type_idx[c_idx] == SAO_EDGE) {
 892                 if (i > 1)
 893                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 894             } else if (sao->offset_sign[c_idx][i]) {
 895                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 896             }
 897             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 898         }
 899     }
 900 }
 901
 902 #undef SET_SAO
 903 #undef CTB
 904
 905 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 906     HEVCLocalContext *lc    = s->HEVClc;
 907     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 908
 909     if (log2_res_scale_abs_plus1 !=  0) {
 910         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 911         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 912                                (1 - 2 * res_scale_sign_flag);
 913     } else {
 914         lc->tu.res_scale_val = 0;
 915     }
 916
 917
 918     return 0;
 919 }
 920
 921 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 922                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 923                               int log2_cb_size, int log2_trafo_size,
 924                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 925 {
 926     HEVCLocalContext *lc = s->HEVClc;
 927     const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
 928     int i;
 929
 930     if (lc->cu.pred_mode == MODE_INTRA) {
 931         int trafo_size = 1 << log2_trafo_size;
 932         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 933
 934         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 935     }
 936
 937     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 938         (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 939         int scan_idx   = SCAN_DIAG;
 940         int scan_idx_c = SCAN_DIAG;
 941         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 942                          (s->ps.sps->chroma_format_idc == 2 &&
 943                          (cbf_cb[1] || cbf_cr[1]));
 944
 945         if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 946             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 947             if (lc->tu.cu_qp_delta != 0)
 948                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 949                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 950             lc->tu.is_cu_qp_delta_coded = 1;
 951
 952             if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset / 2) ||
 953                 lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset / 2)) {
 954                 av_log(s->avctx, AV_LOG_ERROR,
 955                        "The cu_qp_delta %d is outside the valid range "
 956                        "[%d, %d].\n",
 957                        lc->tu.cu_qp_delta,
 958                        -(26 + s->ps.sps->qp_bd_offset / 2),
 959                         (25 + s->ps.sps->qp_bd_offset / 2));
 960                 return AVERROR_INVALIDDATA;
 961             }
 962
 963             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 964         }
 965
 966         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 967             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 968             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 969             if (cu_chroma_qp_offset_flag) {
 970                 int cu_chroma_qp_offset_idx  = 0;
 971                 if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
 972                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 973                     av_log(s->avctx, AV_LOG_ERROR,
 974                         "cu_chroma_qp_offset_idx not yet tested.\n");
 975                 }
 976                 lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 977                 lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 978             } else {
 979                 lc->tu.cu_qp_offset_cb = 0;
 980                 lc->tu.cu_qp_offset_cr = 0;
 981             }
 982             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 983         }
 984
 985         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 986             if (lc->tu.intra_pred_mode >= 6 &&
 987                 lc->tu.intra_pred_mode <= 14) {
 988                 scan_idx = SCAN_VERT;
 989             } else if (lc->tu.intra_pred_mode >= 22 &&
 990                        lc->tu.intra_pred_mode <= 30) {
 991                 scan_idx = SCAN_HORIZ;
 992             }
 993
 994             if (lc->tu.intra_pred_mode_c >=  6 &&
 995                 lc->tu.intra_pred_mode_c <= 14) {
 996                 scan_idx_c = SCAN_VERT;
 997             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 998                        lc->tu.intra_pred_mode_c <= 30) {
 999                 scan_idx_c = SCAN_HORIZ;
1000             }
1001         }
1002
1003         lc->tu.cross_pf = 0;
1004
1005         if (cbf_luma)
1006             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
1007         if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1008             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1009             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1010             lc->tu.cross_pf  = (s->ps.pps->cross_component_prediction_enabled_flag && cbf_luma &&
1011                                 (lc->cu.pred_mode == MODE_INTER ||
1012                                  (lc->tu.chroma_mode_c ==  4)));
1013
1014             if (lc->tu.cross_pf) {
1015                 hls_cross_component_pred(s, 0);
1016             }
1017             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1018                 if (lc->cu.pred_mode == MODE_INTRA) {
1019                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1020                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1021                 }
1022                 if (cbf_cb[i])
1023                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1024                                                 log2_trafo_size_c, scan_idx_c, 1);
1025                 else
1026                     if (lc->tu.cross_pf) {
1027                         ptrdiff_t stride = s->frame->linesize[1];
1028                         int hshift = s->ps.sps->hshift[1];
1029                         int vshift = s->ps.sps->vshift[1];
1030                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1031                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1032                         int size = 1 << log2_trafo_size_c;
1033
1034                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1035                                                               ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1036                         for (i = 0; i < (size * size); i++) {
1037                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1038                         }
1039                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1040                     }
1041             }
1042
1043             if (lc->tu.cross_pf) {
1044                 hls_cross_component_pred(s, 1);
1045             }
1046             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1047                 if (lc->cu.pred_mode == MODE_INTRA) {
1048                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1049                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1050                 }
1051                 if (cbf_cr[i])
1052                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1053                                                 log2_trafo_size_c, scan_idx_c, 2);
1054                 else
1055                     if (lc->tu.cross_pf) {
1056                         ptrdiff_t stride = s->frame->linesize[2];
1057                         int hshift = s->ps.sps->hshift[2];
1058                         int vshift = s->ps.sps->vshift[2];
1059                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1060                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1061                         int size = 1 << log2_trafo_size_c;
1062
1063                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1064                                                           ((x0 >> hshift) << s->ps.sps->pixel_shift)];
1065                         for (i = 0; i < (size * size); i++) {
1066                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1067                         }
1068                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1069                     }
1070             }
1071         } else if (s->ps.sps->chroma_format_idc && blk_idx == 3) {
1072             int trafo_size_h = 1 << (log2_trafo_size + 1);
1073             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1074             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1075                 if (lc->cu.pred_mode == MODE_INTRA) {
1076                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1077                                                     trafo_size_h, trafo_size_v);
1078                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1079                 }
1080                 if (cbf_cb[i])
1081                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1082                                                 log2_trafo_size, scan_idx_c, 1);
1083             }
1084             for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1085                 if (lc->cu.pred_mode == MODE_INTRA) {
1086                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1087                                                 trafo_size_h, trafo_size_v);
1088                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1089                 }
1090                 if (cbf_cr[i])
1091                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1092                                                 log2_trafo_size, scan_idx_c, 2);
1093             }
1094         }
1095     } else if (s->ps.sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1096         if (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3) {
1097             int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]);
1098             int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]);
1099             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1100             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1101             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1102             if (s->ps.sps->chroma_format_idc == 2) {
1103                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1104                                                 trafo_size_h, trafo_size_v);
1105                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1106                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1107             }
1108         } else if (blk_idx == 3) {
1109             int trafo_size_h = 1 << (log2_trafo_size + 1);
1110             int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]);
1111             ff_hevc_set_neighbour_available(s, xBase, yBase,
1112                                             trafo_size_h, trafo_size_v);
1113             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1114             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1115             if (s->ps.sps->chroma_format_idc == 2) {
1116                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1117                                                 trafo_size_h, trafo_size_v);
1118                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1119                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1120             }
1121         }
1122     }
1123
1124     return 0;
1125 }
1126
1127 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1128 {
1129     int cb_size          = 1 << log2_cb_size;
1130     int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
1131
1132     int min_pu_width     = s->ps.sps->min_pu_width;
1133     int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
1134     int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
1135     int i, j;
1136
1137     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1138         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1139             s->is_pcm[i + j * min_pu_width] = 2;
1140 }
1141
1142 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1143                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1144                               int log2_cb_size, int log2_trafo_size,
1145                               int trafo_depth, int blk_idx,
1146                               const int *base_cbf_cb, const int *base_cbf_cr)
1147 {
1148     HEVCLocalContext *lc = s->HEVClc;
1149     uint8_t split_transform_flag;
1150     int cbf_cb[2];
1151     int cbf_cr[2];
1152     int ret;
1153
1154     cbf_cb[0] = base_cbf_cb[0];
1155     cbf_cb[1] = base_cbf_cb[1];
1156     cbf_cr[0] = base_cbf_cr[0];
1157     cbf_cr[1] = base_cbf_cr[1];
1158
1159     if (lc->cu.intra_split_flag) {
1160         if (trafo_depth == 1) {
1161             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1162             if (s->ps.sps->chroma_format_idc == 3) {
1163                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1164                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1165             } else {
1166                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1167                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1168             }
1169         }
1170     } else {
1171         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1172         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1173         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1174     }
1175
1176     if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
1177         log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
1178         trafo_depth     < lc->cu.max_trafo_depth       &&
1179         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1180         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1181     } else {
1182         int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
1183                           lc->cu.pred_mode == MODE_INTER &&
1184                           lc->cu.part_mode != PART_2Nx2N &&
1185                           trafo_depth == 0;
1186
1187         split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
1188                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1189                                inter_split;
1190     }
1191
1192     if (s->ps.sps->chroma_format_idc && (log2_trafo_size > 2 || s->ps.sps->chroma_format_idc == 3)) {
1193         if (trafo_depth == 0 || cbf_cb[0]) {
1194             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1195             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1196                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1197             }
1198         }
1199
1200         if (trafo_depth == 0 || cbf_cr[0]) {
1201             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1202             if (s->ps.sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1203                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1204             }
1205         }
1206     }
1207
1208     if (split_transform_flag) {
1209         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1210         const int x1 = x0 + trafo_size_split;
1211         const int y1 = y0 + trafo_size_split;
1212
1213 #define SUBDIVIDE(x, y, idx)                                                    \
1214 do {                                                                            \
1215     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1216                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1217                              cbf_cb, cbf_cr);                                   \
1218     if (ret < 0)                                                                \
1219         return ret;                                                             \
1220 } while (0)
1221
1222         SUBDIVIDE(x0, y0, 0);
1223         SUBDIVIDE(x1, y0, 1);
1224         SUBDIVIDE(x0, y1, 2);
1225         SUBDIVIDE(x1, y1, 3);
1226
1227 #undef SUBDIVIDE
1228     } else {
1229         int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
1230         int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
1231         int min_tu_width     = s->ps.sps->min_tb_width;
1232         int cbf_luma         = 1;
1233
1234         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1235             cbf_cb[0] || cbf_cr[0] ||
1236             (s->ps.sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1237             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1238         }
1239
1240         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1241                                  log2_cb_size, log2_trafo_size,
1242                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1243         if (ret < 0)
1244             return ret;
1245         // TODO: store cbf_luma somewhere else
1246         if (cbf_luma) {
1247             int i, j;
1248             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1249                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1250                     int x_tu = (x0 + j) >> log2_min_tu_size;
1251                     int y_tu = (y0 + i) >> log2_min_tu_size;
1252                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1253                 }
1254         }
1255         if (!s->sh.disable_deblocking_filter_flag) {
1256             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1257             if (s->ps.pps->transquant_bypass_enable_flag &&
1258                 lc->cu.cu_transquant_bypass_flag)
1259                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1260         }
1261     }
1262     return 0;
1263 }
1264
1265 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1266 {
1267     HEVCLocalContext *lc = s->HEVClc;
1268     GetBitContext gb;
1269     int cb_size   = 1 << log2_cb_size;
1270     int stride0   = s->frame->linesize[0];
1271     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->ps.sps->pixel_shift)];
1272     int   stride1 = s->frame->linesize[1];
1273     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->ps.sps->vshift[1]) * stride1 + ((x0 >> s->ps.sps->hshift[1]) << s->ps.sps->pixel_shift)];
1274     int   stride2 = s->frame->linesize[2];
1275     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->ps.sps->vshift[2]) * stride2 + ((x0 >> s->ps.sps->hshift[2]) << s->ps.sps->pixel_shift)];
1276
1277     int length         = cb_size * cb_size * s->ps.sps->pcm.bit_depth +
1278                          (((cb_size >> s->ps.sps->hshift[1]) * (cb_size >> s->ps.sps->vshift[1])) +
1279                           ((cb_size >> s->ps.sps->hshift[2]) * (cb_size >> s->ps.sps->vshift[2]))) *
1280                           s->ps.sps->pcm.bit_depth_chroma;
1281     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1282     int ret;
1283
1284     if (!s->sh.disable_deblocking_filter_flag)
1285         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1286
1287     ret = init_get_bits(&gb, pcm, length);
1288     if (ret < 0)
1289         return ret;
1290
1291     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->ps.sps->pcm.bit_depth);
1292     if (s->ps.sps->chroma_format_idc) {
1293         s->hevcdsp.put_pcm(dst1, stride1,
1294                            cb_size >> s->ps.sps->hshift[1],
1295                            cb_size >> s->ps.sps->vshift[1],
1296                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1297         s->hevcdsp.put_pcm(dst2, stride2,
1298                            cb_size >> s->ps.sps->hshift[2],
1299                            cb_size >> s->ps.sps->vshift[2],
1300                            &gb, s->ps.sps->pcm.bit_depth_chroma);
1301     }
1302
1303     return 0;
1304 }
1305
1306 /**
1307  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1308  *
1309  * @param s HEVC decoding context
1310  * @param dst target buffer for block data at block position
1311  * @param dststride stride of the dst buffer
1312  * @param ref reference picture buffer at origin (0, 0)
1313  * @param mv motion vector (relative to block position) to get pixel data from
1314  * @param x_off horizontal position of block from origin (0, 0)
1315  * @param y_off vertical position of block from origin (0, 0)
1316  * @param block_w width of block
1317  * @param block_h height of block
1318  * @param luma_weight weighting factor applied to the luma prediction
1319  * @param luma_offset additive offset applied to the luma prediction value
1320  */
1321
1322 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1323                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1324                         int block_w, int block_h, int luma_weight, int luma_offset)
1325 {
1326     HEVCLocalContext *lc = s->HEVClc;
1327     uint8_t *src         = ref->data[0];
1328     ptrdiff_t srcstride  = ref->linesize[0];
1329     int pic_width        = s->ps.sps->width;
1330     int pic_height       = s->ps.sps->height;
1331     int mx               = mv->x & 3;
1332     int my               = mv->y & 3;
1333     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1334                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1335     int idx              = ff_hevc_pel_weight[block_w];
1336
1337     x_off += mv->x >> 2;
1338     y_off += mv->y >> 2;
1339     src   += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1340
1341     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1342         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1343         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1344         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1345         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1346         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1347
1348         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1349                                  edge_emu_stride, srcstride,
1350                                  block_w + QPEL_EXTRA,
1351                                  block_h + QPEL_EXTRA,
1352                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1353                                  pic_width, pic_height);
1354         src = lc->edge_emu_buffer + buf_offset;
1355         srcstride = edge_emu_stride;
1356     }
1357
1358     if (!weight_flag)
1359         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1360                                                       block_h, mx, my, block_w);
1361     else
1362         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1363                                                         block_h, s->sh.luma_log2_weight_denom,
1364                                                         luma_weight, luma_offset, mx, my, block_w);
1365 }
1366
1367 /**
1368  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1369  *
1370  * @param s HEVC decoding context
1371  * @param dst target buffer for block data at block position
1372  * @param dststride stride of the dst buffer
1373  * @param ref0 reference picture0 buffer at origin (0, 0)
1374  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1375  * @param x_off horizontal position of block from origin (0, 0)
1376  * @param y_off vertical position of block from origin (0, 0)
1377  * @param block_w width of block
1378  * @param block_h height of block
1379  * @param ref1 reference picture1 buffer at origin (0, 0)
1380  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1381  * @param current_mv current motion vector structure
1382  */
1383  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1384                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1385                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1386 {
1387     HEVCLocalContext *lc = s->HEVClc;
1388     ptrdiff_t src0stride  = ref0->linesize[0];
1389     ptrdiff_t src1stride  = ref1->linesize[0];
1390     int pic_width        = s->ps.sps->width;
1391     int pic_height       = s->ps.sps->height;
1392     int mx0              = mv0->x & 3;
1393     int my0              = mv0->y & 3;
1394     int mx1              = mv1->x & 3;
1395     int my1              = mv1->y & 3;
1396     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1397                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1398     int x_off0           = x_off + (mv0->x >> 2);
1399     int y_off0           = y_off + (mv0->y >> 2);
1400     int x_off1           = x_off + (mv1->x >> 2);
1401     int y_off1           = y_off + (mv1->y >> 2);
1402     int idx              = ff_hevc_pel_weight[block_w];
1403
1404     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1405     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1406
1407     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1408         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1409         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1410         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1411         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1412         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1413
1414         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1415                                  edge_emu_stride, src0stride,
1416                                  block_w + QPEL_EXTRA,
1417                                  block_h + QPEL_EXTRA,
1418                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1419                                  pic_width, pic_height);
1420         src0 = lc->edge_emu_buffer + buf_offset;
1421         src0stride = edge_emu_stride;
1422     }
1423
1424     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1425         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1426         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1427         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1428         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1429         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->ps.sps->pixel_shift);
1430
1431         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1432                                  edge_emu_stride, src1stride,
1433                                  block_w + QPEL_EXTRA,
1434                                  block_h + QPEL_EXTRA,
1435                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1436                                  pic_width, pic_height);
1437         src1 = lc->edge_emu_buffer2 + buf_offset;
1438         src1stride = edge_emu_stride;
1439     }
1440
1441     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1442                                                 block_h, mx0, my0, block_w);
1443     if (!weight_flag)
1444         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1445                                                        block_h, mx1, my1, block_w);
1446     else
1447         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1448                                                          block_h, s->sh.luma_log2_weight_denom,
1449                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1450                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1451                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1452                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1453                                                          mx1, my1, block_w);
1454
1455 }
1456
1457 /**
1458  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1459  *
1460  * @param s HEVC decoding context
1461  * @param dst1 target buffer for block data at block position (U plane)
1462  * @param dst2 target buffer for block data at block position (V plane)
1463  * @param dststride stride of the dst1 and dst2 buffers
1464  * @param ref reference picture buffer at origin (0, 0)
1465  * @param mv motion vector (relative to block position) to get pixel data from
1466  * @param x_off horizontal position of block from origin (0, 0)
1467  * @param y_off vertical position of block from origin (0, 0)
1468  * @param block_w width of block
1469  * @param block_h height of block
1470  * @param chroma_weight weighting factor applied to the chroma prediction
1471  * @param chroma_offset additive offset applied to the chroma prediction value
1472  */
1473
1474 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1475                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1476                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1477 {
1478     HEVCLocalContext *lc = s->HEVClc;
1479     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1480     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1481     const Mv *mv         = &current_mv->mv[reflist];
1482     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1483                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1484     int idx              = ff_hevc_pel_weight[block_w];
1485     int hshift           = s->ps.sps->hshift[1];
1486     int vshift           = s->ps.sps->vshift[1];
1487     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1488     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1489     intptr_t _mx         = mx << (1 - hshift);
1490     intptr_t _my         = my << (1 - vshift);
1491
1492     x_off += mv->x >> (2 + hshift);
1493     y_off += mv->y >> (2 + vshift);
1494     src0  += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift));
1495
1496     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1497         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1498         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1499         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1500         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->ps.sps->pixel_shift));
1501         int buf_offset0 = EPEL_EXTRA_BEFORE *
1502                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1503         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1504                                  edge_emu_stride, srcstride,
1505                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1506                                  x_off - EPEL_EXTRA_BEFORE,
1507                                  y_off - EPEL_EXTRA_BEFORE,
1508                                  pic_width, pic_height);
1509
1510         src0 = lc->edge_emu_buffer + buf_offset0;
1511         srcstride = edge_emu_stride;
1512     }
1513     if (!weight_flag)
1514         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1515                                                   block_h, _mx, _my, block_w);
1516     else
1517         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1518                                                         block_h, s->sh.chroma_log2_weight_denom,
1519                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1520 }
1521
1522 /**
1523  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1524  *
1525  * @param s HEVC decoding context
1526  * @param dst target buffer for block data at block position
1527  * @param dststride stride of the dst buffer
1528  * @param ref0 reference picture0 buffer at origin (0, 0)
1529  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1530  * @param x_off horizontal position of block from origin (0, 0)
1531  * @param y_off vertical position of block from origin (0, 0)
1532  * @param block_w width of block
1533  * @param block_h height of block
1534  * @param ref1 reference picture1 buffer at origin (0, 0)
1535  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1536  * @param current_mv current motion vector structure
1537  * @param cidx chroma component(cb, cr)
1538  */
1539 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1540                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1541 {
1542     HEVCLocalContext *lc = s->HEVClc;
1543     uint8_t *src1        = ref0->data[cidx+1];
1544     uint8_t *src2        = ref1->data[cidx+1];
1545     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1546     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1547     int weight_flag      = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) ||
1548                            (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag);
1549     int pic_width        = s->ps.sps->width >> s->ps.sps->hshift[1];
1550     int pic_height       = s->ps.sps->height >> s->ps.sps->vshift[1];
1551     Mv *mv0              = &current_mv->mv[0];
1552     Mv *mv1              = &current_mv->mv[1];
1553     int hshift = s->ps.sps->hshift[1];
1554     int vshift = s->ps.sps->vshift[1];
1555
1556     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1557     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1558     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1559     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1560     intptr_t _mx0 = mx0 << (1 - hshift);
1561     intptr_t _my0 = my0 << (1 - vshift);
1562     intptr_t _mx1 = mx1 << (1 - hshift);
1563     intptr_t _my1 = my1 << (1 - vshift);
1564
1565     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1566     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1567     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1568     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1569     int idx = ff_hevc_pel_weight[block_w];
1570     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift);
1571     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift);
1572
1573     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1574         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1575         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1576         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1577         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->ps.sps->pixel_shift));
1578         int buf_offset1 = EPEL_EXTRA_BEFORE *
1579                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1580
1581         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1582                                  edge_emu_stride, src1stride,
1583                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1584                                  x_off0 - EPEL_EXTRA_BEFORE,
1585                                  y_off0 - EPEL_EXTRA_BEFORE,
1586                                  pic_width, pic_height);
1587
1588         src1 = lc->edge_emu_buffer + buf_offset1;
1589         src1stride = edge_emu_stride;
1590     }
1591
1592     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1593         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1594         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1595         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->ps.sps->pixel_shift;
1596         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->ps.sps->pixel_shift));
1597         int buf_offset1 = EPEL_EXTRA_BEFORE *
1598                           (edge_emu_stride + (1 << s->ps.sps->pixel_shift));
1599
1600         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1601                                  edge_emu_stride, src2stride,
1602                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1603                                  x_off1 - EPEL_EXTRA_BEFORE,
1604                                  y_off1 - EPEL_EXTRA_BEFORE,
1605                                  pic_width, pic_height);
1606
1607         src2 = lc->edge_emu_buffer2 + buf_offset1;
1608         src2stride = edge_emu_stride;
1609     }
1610
1611     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1612                                                 block_h, _mx0, _my0, block_w);
1613     if (!weight_flag)
1614         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1615                                                        src2, src2stride, lc->tmp,
1616                                                        block_h, _mx1, _my1, block_w);
1617     else
1618         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1619                                                          src2, src2stride, lc->tmp,
1620                                                          block_h,
1621                                                          s->sh.chroma_log2_weight_denom,
1622                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1623                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1624                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1625                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1626                                                          _mx1, _my1, block_w);
1627 }
1628
1629 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1630                                 const Mv *mv, int y0, int height)
1631 {
1632     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1633
1634     if (s->threads_type == FF_THREAD_FRAME )
1635         ff_thread_await_progress(&ref->tf, y, 0);
1636 }
1637
1638 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1639                                   int nPbH, int log2_cb_size, int part_idx,
1640                                   int merge_idx, MvField *mv)
1641 {
1642     HEVCLocalContext *lc = s->HEVClc;
1643     enum InterPredIdc inter_pred_idc = PRED_L0;
1644     int mvp_flag;
1645
1646     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1647     mv->pred_flag = 0;
1648     if (s->sh.slice_type == B_SLICE)
1649         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1650
1651     if (inter_pred_idc != PRED_L1) {
1652         if (s->sh.nb_refs[L0])
1653             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1654
1655         mv->pred_flag = PF_L0;
1656         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1657         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1658         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1659                                  part_idx, merge_idx, mv, mvp_flag, 0);
1660         mv->mv[0].x += lc->pu.mvd.x;
1661         mv->mv[0].y += lc->pu.mvd.y;
1662     }
1663
1664     if (inter_pred_idc != PRED_L0) {
1665         if (s->sh.nb_refs[L1])
1666             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1667
1668         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1669             AV_ZERO32(&lc->pu.mvd);
1670         } else {
1671             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1672         }
1673
1674         mv->pred_flag += PF_L1;
1675         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1676         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1677                                  part_idx, merge_idx, mv, mvp_flag, 1);
1678         mv->mv[1].x += lc->pu.mvd.x;
1679         mv->mv[1].y += lc->pu.mvd.y;
1680     }
1681 }
1682
1683 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1684                                 int nPbW, int nPbH,
1685                                 int log2_cb_size, int partIdx, int idx)
1686 {
1687 #define POS(c_idx, x, y)                                                              \
1688     &s->frame->data[c_idx][((y) >> s->ps.sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1689                            (((x) >> s->ps.sps->hshift[c_idx]) << s->ps.sps->pixel_shift)]
1690     HEVCLocalContext *lc = s->HEVClc;
1691     int merge_idx = 0;
1692     struct MvField current_mv = {{{ 0 }}};
1693
1694     int min_pu_width = s->ps.sps->min_pu_width;
1695
1696     MvField *tab_mvf = s->ref->tab_mvf;
1697     RefPicList  *refPicList = s->ref->refPicList;
1698     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1699     uint8_t *dst0 = POS(0, x0, y0);
1700     uint8_t *dst1 = POS(1, x0, y0);
1701     uint8_t *dst2 = POS(2, x0, y0);
1702     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
1703     int min_cb_width     = s->ps.sps->min_cb_width;
1704     int x_cb             = x0 >> log2_min_cb_size;
1705     int y_cb             = y0 >> log2_min_cb_size;
1706     int x_pu, y_pu;
1707     int i, j;
1708
1709     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1710
1711     if (!skip_flag)
1712         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1713
1714     if (skip_flag || lc->pu.merge_flag) {
1715         if (s->sh.max_num_merge_cand > 1)
1716             merge_idx = ff_hevc_merge_idx_decode(s);
1717         else
1718             merge_idx = 0;
1719
1720         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1721                                    partIdx, merge_idx, &current_mv);
1722     } else {
1723         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1724                               partIdx, merge_idx, &current_mv);
1725     }
1726
1727     x_pu = x0 >> s->ps.sps->log2_min_pu_size;
1728     y_pu = y0 >> s->ps.sps->log2_min_pu_size;
1729
1730     for (j = 0; j < nPbH >> s->ps.sps->log2_min_pu_size; j++)
1731         for (i = 0; i < nPbW >> s->ps.sps->log2_min_pu_size; i++)
1732             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1733
1734     if (current_mv.pred_flag & PF_L0) {
1735         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1736         if (!ref0)
1737             return;
1738         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1739     }
1740     if (current_mv.pred_flag & PF_L1) {
1741         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1742         if (!ref1)
1743             return;
1744         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1745     }
1746
1747     if (current_mv.pred_flag == PF_L0) {
1748         int x0_c = x0 >> s->ps.sps->hshift[1];
1749         int y0_c = y0 >> s->ps.sps->vshift[1];
1750         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1751         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1752
1753         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1754                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1755                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1756                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1757
1758         if (s->ps.sps->chroma_format_idc) {
1759             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1760                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1761                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1762             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1763                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1764                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1765         }
1766     } else if (current_mv.pred_flag == PF_L1) {
1767         int x0_c = x0 >> s->ps.sps->hshift[1];
1768         int y0_c = y0 >> s->ps.sps->vshift[1];
1769         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1770         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1771
1772         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1773                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1774                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1775                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1776
1777         if (s->ps.sps->chroma_format_idc) {
1778             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1779                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1780                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1781
1782             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1783                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1784                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1785         }
1786     } else if (current_mv.pred_flag == PF_BI) {
1787         int x0_c = x0 >> s->ps.sps->hshift[1];
1788         int y0_c = y0 >> s->ps.sps->vshift[1];
1789         int nPbW_c = nPbW >> s->ps.sps->hshift[1];
1790         int nPbH_c = nPbH >> s->ps.sps->vshift[1];
1791
1792         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1793                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1794                    ref1->frame, &current_mv.mv[1], &current_mv);
1795
1796         if (s->ps.sps->chroma_format_idc) {
1797             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1798                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1799
1800             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1801                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1802         }
1803     }
1804 }
1805
1806 /**
1807  * 8.4.1
1808  */
1809 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1810                                 int prev_intra_luma_pred_flag)
1811 {
1812     HEVCLocalContext *lc = s->HEVClc;
1813     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1814     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1815     int min_pu_width     = s->ps.sps->min_pu_width;
1816     int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
1817     int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
1818     int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
1819
1820     int cand_up   = (lc->ctb_up_flag || y0b) ?
1821                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1822     int cand_left = (lc->ctb_left_flag || x0b) ?
1823                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1824
1825     int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
1826
1827     MvField *tab_mvf = s->ref->tab_mvf;
1828     int intra_pred_mode;
1829     int candidate[3];
1830     int i, j;
1831
1832     // intra_pred_mode prediction does not cross vertical CTB boundaries
1833     if ((y0 - 1) < y_ctb)
1834         cand_up = INTRA_DC;
1835
1836     if (cand_left == cand_up) {
1837         if (cand_left < 2) {
1838             candidate[0] = INTRA_PLANAR;
1839             candidate[1] = INTRA_DC;
1840             candidate[2] = INTRA_ANGULAR_26;
1841         } else {
1842             candidate[0] = cand_left;
1843             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1844             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1845         }
1846     } else {
1847         candidate[0] = cand_left;
1848         candidate[1] = cand_up;
1849         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1850             candidate[2] = INTRA_PLANAR;
1851         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1852             candidate[2] = INTRA_DC;
1853         } else {
1854             candidate[2] = INTRA_ANGULAR_26;
1855         }
1856     }
1857
1858     if (prev_intra_luma_pred_flag) {
1859         intra_pred_mode = candidate[lc->pu.mpm_idx];
1860     } else {
1861         if (candidate[0] > candidate[1])
1862             FFSWAP(uint8_t, candidate[0], candidate[1]);
1863         if (candidate[0] > candidate[2])
1864             FFSWAP(uint8_t, candidate[0], candidate[2]);
1865         if (candidate[1] > candidate[2])
1866             FFSWAP(uint8_t, candidate[1], candidate[2]);
1867
1868         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1869         for (i = 0; i < 3; i++)
1870             if (intra_pred_mode >= candidate[i])
1871                 intra_pred_mode++;
1872     }
1873
1874     /* write the intra prediction units into the mv array */
1875     if (!size_in_pus)
1876         size_in_pus = 1;
1877     for (i = 0; i < size_in_pus; i++) {
1878         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1879                intra_pred_mode, size_in_pus);
1880
1881         for (j = 0; j < size_in_pus; j++) {
1882             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1883         }
1884     }
1885
1886     return intra_pred_mode;
1887 }
1888
1889 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1890                                           int log2_cb_size, int ct_depth)
1891 {
1892     int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
1893     int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
1894     int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
1895     int y;
1896
1897     for (y = 0; y < length; y++)
1898         memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
1899                ct_depth, length);
1900 }
1901
1902 static const uint8_t tab_mode_idx[] = {
1903      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1904     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1905
1906 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1907                                   int log2_cb_size)
1908 {
1909     HEVCLocalContext *lc = s->HEVClc;
1910     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1911     uint8_t prev_intra_luma_pred_flag[4];
1912     int split   = lc->cu.part_mode == PART_NxN;
1913     int pb_size = (1 << log2_cb_size) >> split;
1914     int side    = split + 1;
1915     int chroma_mode;
1916     int i, j;
1917
1918     for (i = 0; i < side; i++)
1919         for (j = 0; j < side; j++)
1920             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1921
1922     for (i = 0; i < side; i++) {
1923         for (j = 0; j < side; j++) {
1924             if (prev_intra_luma_pred_flag[2 * i + j])
1925                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1926             else
1927                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1928
1929             lc->pu.intra_pred_mode[2 * i + j] =
1930                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1931                                      prev_intra_luma_pred_flag[2 * i + j]);
1932         }
1933     }
1934
1935     if (s->ps.sps->chroma_format_idc == 3) {
1936         for (i = 0; i < side; i++) {
1937             for (j = 0; j < side; j++) {
1938                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1939                 if (chroma_mode != 4) {
1940                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1941                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1942                     else
1943                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1944                 } else {
1945                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1946                 }
1947             }
1948         }
1949     } else if (s->ps.sps->chroma_format_idc == 2) {
1950         int mode_idx;
1951         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952         if (chroma_mode != 4) {
1953             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954                 mode_idx = 34;
1955             else
1956                 mode_idx = intra_chroma_table[chroma_mode];
1957         } else {
1958             mode_idx = lc->pu.intra_pred_mode[0];
1959         }
1960         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1961     } else if (s->ps.sps->chroma_format_idc != 0) {
1962         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1963         if (chroma_mode != 4) {
1964             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1965                 lc->pu.intra_pred_mode_c[0] = 34;
1966             else
1967                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1968         } else {
1969             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1970         }
1971     }
1972 }
1973
1974 static void intra_prediction_unit_default_value(HEVCContext *s,
1975                                                 int x0, int y0,
1976                                                 int log2_cb_size)
1977 {
1978     HEVCLocalContext *lc = s->HEVClc;
1979     int pb_size          = 1 << log2_cb_size;
1980     int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
1981     int min_pu_width     = s->ps.sps->min_pu_width;
1982     MvField *tab_mvf     = s->ref->tab_mvf;
1983     int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
1984     int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
1985     int j, k;
1986
1987     if (size_in_pus == 0)
1988         size_in_pus = 1;
1989     for (j = 0; j < size_in_pus; j++)
1990         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1991     if (lc->cu.pred_mode == MODE_INTRA)
1992         for (j = 0; j < size_in_pus; j++)
1993             for (k = 0; k < size_in_pus; k++)
1994                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1995 }
1996
1997 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1998 {
1999     int cb_size          = 1 << log2_cb_size;
2000     HEVCLocalContext *lc = s->HEVClc;
2001     int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
2002     int length           = cb_size >> log2_min_cb_size;
2003     int min_cb_width     = s->ps.sps->min_cb_width;
2004     int x_cb             = x0 >> log2_min_cb_size;
2005     int y_cb             = y0 >> log2_min_cb_size;
2006     int idx              = log2_cb_size - 2;
2007     int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2008     int x, y, ret;
2009
2010     lc->cu.x                = x0;
2011     lc->cu.y                = y0;
2012     lc->cu.pred_mode        = MODE_INTRA;
2013     lc->cu.part_mode        = PART_2Nx2N;
2014     lc->cu.intra_split_flag = 0;
2015
2016     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2017     for (x = 0; x < 4; x++)
2018         lc->pu.intra_pred_mode[x] = 1;
2019     if (s->ps.pps->transquant_bypass_enable_flag) {
2020         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2021         if (lc->cu.cu_transquant_bypass_flag)
2022             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2023     } else
2024         lc->cu.cu_transquant_bypass_flag = 0;
2025
2026     if (s->sh.slice_type != I_SLICE) {
2027         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2028
2029         x = y_cb * min_cb_width + x_cb;
2030         for (y = 0; y < length; y++) {
2031             memset(&s->skip_flag[x], skip_flag, length);
2032             x += min_cb_width;
2033         }
2034         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2035     } else {
2036         x = y_cb * min_cb_width + x_cb;
2037         for (y = 0; y < length; y++) {
2038             memset(&s->skip_flag[x], 0, length);
2039             x += min_cb_width;
2040         }
2041     }
2042
2043     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2044         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2045         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2046
2047         if (!s->sh.disable_deblocking_filter_flag)
2048             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2049     } else {
2050         int pcm_flag = 0;
2051
2052         if (s->sh.slice_type != I_SLICE)
2053             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2054         if (lc->cu.pred_mode != MODE_INTRA ||
2055             log2_cb_size == s->ps.sps->log2_min_cb_size) {
2056             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2057             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2058                                       lc->cu.pred_mode == MODE_INTRA;
2059         }
2060
2061         if (lc->cu.pred_mode == MODE_INTRA) {
2062             if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
2063                 log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
2064                 log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
2065                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2066             }
2067             if (pcm_flag) {
2068                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2070                 if (s->ps.sps->pcm.loop_filter_disable_flag)
2071                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2072
2073                 if (ret < 0)
2074                     return ret;
2075             } else {
2076                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2077             }
2078         } else {
2079             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2080             switch (lc->cu.part_mode) {
2081             case PART_2Nx2N:
2082                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2083                 break;
2084             case PART_2NxN:
2085                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2086                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2087                 break;
2088             case PART_Nx2N:
2089                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2090                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2091                 break;
2092             case PART_2NxnU:
2093                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2094                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2095                 break;
2096             case PART_2NxnD:
2097                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2098                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2099                 break;
2100             case PART_nLx2N:
2101                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2102                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2103                 break;
2104             case PART_nRx2N:
2105                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2106                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2107                 break;
2108             case PART_NxN:
2109                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2110                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2111                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2112                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2113                 break;
2114             }
2115         }
2116
2117         if (!pcm_flag) {
2118             int rqt_root_cbf = 1;
2119
2120             if (lc->cu.pred_mode != MODE_INTRA &&
2121                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2122                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2123             }
2124             if (rqt_root_cbf) {
2125                 const static int cbf[2] = { 0 };
2126                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2127                                          s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2128                                          s->ps.sps->max_transform_hierarchy_depth_inter;
2129                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2130                                          log2_cb_size,
2131                                          log2_cb_size, 0, 0, cbf, cbf);
2132                 if (ret < 0)
2133                     return ret;
2134             } else {
2135                 if (!s->sh.disable_deblocking_filter_flag)
2136                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2137             }
2138         }
2139     }
2140
2141     if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2142         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2143
2144     x = y_cb * min_cb_width + x_cb;
2145     for (y = 0; y < length; y++) {
2146         memset(&s->qp_y_tab[x], lc->qp_y, length);
2147         x += min_cb_width;
2148     }
2149
2150     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2151        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2152         lc->qPy_pred = lc->qp_y;
2153     }
2154
2155     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2156
2157     return 0;
2158 }
2159
2160 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2161                                int log2_cb_size, int cb_depth)
2162 {
2163     HEVCLocalContext *lc = s->HEVClc;
2164     const int cb_size    = 1 << log2_cb_size;
2165     int ret;
2166     int split_cu;
2167
2168     lc->ct_depth = cb_depth;
2169     if (x0 + cb_size <= s->ps.sps->width  &&
2170         y0 + cb_size <= s->ps.sps->height &&
2171         log2_cb_size > s->ps.sps->log2_min_cb_size) {
2172         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2173     } else {
2174         split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
2175     }
2176     if (s->ps.pps->cu_qp_delta_enabled_flag &&
2177         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
2178         lc->tu.is_cu_qp_delta_coded = 0;
2179         lc->tu.cu_qp_delta          = 0;
2180     }
2181
2182     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2183         log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth) {
2184         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2185     }
2186
2187     if (split_cu) {
2188         int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
2189         const int cb_size_split = cb_size >> 1;
2190         const int x1 = x0 + cb_size_split;
2191         const int y1 = y0 + cb_size_split;
2192
2193         int more_data = 0;
2194
2195         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2196         if (more_data < 0)
2197             return more_data;
2198
2199         if (more_data && x1 < s->ps.sps->width) {
2200             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204         if (more_data && y1 < s->ps.sps->height) {
2205             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2206             if (more_data < 0)
2207                 return more_data;
2208         }
2209         if (more_data && x1 < s->ps.sps->width &&
2210             y1 < s->ps.sps->height) {
2211             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2212             if (more_data < 0)
2213                 return more_data;
2214         }
2215
2216         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2217             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2218             lc->qPy_pred = lc->qp_y;
2219
2220         if (more_data)
2221             return ((x1 + cb_size_split) < s->ps.sps->width ||
2222                     (y1 + cb_size_split) < s->ps.sps->height);
2223         else
2224             return 0;
2225     } else {
2226         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2227         if (ret < 0)
2228             return ret;
2229         if ((!((x0 + cb_size) %
2230                (1 << (s->ps.sps->log2_ctb_size))) ||
2231              (x0 + cb_size >= s->ps.sps->width)) &&
2232             (!((y0 + cb_size) %
2233                (1 << (s->ps.sps->log2_ctb_size))) ||
2234              (y0 + cb_size >= s->ps.sps->height))) {
2235             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2236             return !end_of_slice_flag;
2237         } else {
2238             return 1;
2239         }
2240     }
2241
2242     return 0;
2243 }
2244
2245 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2246                                  int ctb_addr_ts)
2247 {
2248     HEVCLocalContext *lc  = s->HEVClc;
2249     int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
2250     int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2251     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2252
2253     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2254
2255     if (s->ps.pps->entropy_coding_sync_enabled_flag) {
2256         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2257             lc->first_qp_group = 1;
2258         lc->end_of_tiles_x = s->ps.sps->width;
2259     } else if (s->ps.pps->tiles_enabled_flag) {
2260         if (ctb_addr_ts && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[ctb_addr_ts - 1]) {
2261             int idxX = s->ps.pps->col_idxX[x_ctb >> s->ps.sps->log2_ctb_size];
2262             lc->end_of_tiles_x   = x_ctb + (s->ps.pps->column_width[idxX] << s->ps.sps->log2_ctb_size);
2263             lc->first_qp_group   = 1;
2264         }
2265     } else {
2266         lc->end_of_tiles_x = s->ps.sps->width;
2267     }
2268
2269     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
2270
2271     lc->boundary_flags = 0;
2272     if (s->ps.pps->tiles_enabled_flag) {
2273         if (x_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2274             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2275         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2276             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2277         if (y_ctb > 0 && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]])
2278             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2279         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->ps.sps->ctb_width])
2280             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2281     } else {
2282         if (ctb_addr_in_slice <= 0)
2283             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2284         if (ctb_addr_in_slice < s->ps.sps->ctb_width)
2285             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2286     }
2287
2288     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2289     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->ps.sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2290     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->ps.sps->ctb_width]]));
2291     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]]));
2292 }
2293
2294 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2295 {
2296     HEVCContext *s  = avctxt->priv_data;
2297     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
2298     int more_data   = 1;
2299     int x_ctb       = 0;
2300     int y_ctb       = 0;
2301     int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2302
2303     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2304         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2305         return AVERROR_INVALIDDATA;
2306     }
2307
2308     if (s->sh.dependent_slice_segment_flag) {
2309         int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2310         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2311             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2312             return AVERROR_INVALIDDATA;
2313         }
2314     }
2315
2316     while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2317         int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2318
2319         x_ctb = (ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2320         y_ctb = (ctb_addr_rs / ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size;
2321         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2322
2323         ff_hevc_cabac_init(s, ctb_addr_ts);
2324
2325         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2326
2327         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2328         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2329         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2330
2331         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2332         if (more_data < 0) {
2333             s->tab_slice_address[ctb_addr_rs] = -1;
2334             return more_data;
2335         }
2336
2337
2338         ctb_addr_ts++;
2339         ff_hevc_save_states(s, ctb_addr_ts);
2340         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2341     }
2342
2343     if (x_ctb + ctb_size >= s->ps.sps->width &&
2344         y_ctb + ctb_size >= s->ps.sps->height)
2345         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2346
2347     return ctb_addr_ts;
2348 }
2349
2350 static int hls_slice_data(HEVCContext *s)
2351 {
2352     int arg[2];
2353     int ret[2];
2354
2355     arg[0] = 0;
2356     arg[1] = 1;
2357
2358     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2359     return ret[0];
2360 }
2361 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2362 {
2363     HEVCContext *s1  = avctxt->priv_data, *s;
2364     HEVCLocalContext *lc;
2365     int ctb_size    = 1<< s1->ps.sps->log2_ctb_size;
2366     int more_data   = 1;
2367     int *ctb_row_p    = input_ctb_row;
2368     int ctb_row = ctb_row_p[job];
2369     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->ps.sps->width + ctb_size - 1) >> s1->ps.sps->log2_ctb_size);
2370     int ctb_addr_ts = s1->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2371     int thread = ctb_row % s1->threads_number;
2372     int ret;
2373
2374     s = s1->sList[self_id];
2375     lc = s->HEVClc;
2376
2377     if(ctb_row) {
2378         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2379
2380         if (ret < 0)
2381             return ret;
2382         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2383     }
2384
2385     while(more_data && ctb_addr_ts < s->ps.sps->ctb_size) {
2386         int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2387         int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size;
2388
2389         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2390
2391         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2392
2393         if (avpriv_atomic_int_get(&s1->wpp_err)){
2394             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2395             return 0;
2396         }
2397
2398         ff_hevc_cabac_init(s, ctb_addr_ts);
2399         hls_sao_param(s, x_ctb >> s->ps.sps->log2_ctb_size, y_ctb >> s->ps.sps->log2_ctb_size);
2400         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0);
2401
2402         if (more_data < 0) {
2403             s->tab_slice_address[ctb_addr_rs] = -1;
2404             avpriv_atomic_int_set(&s1->wpp_err,  1);
2405             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2406             return more_data;
2407         }
2408
2409         ctb_addr_ts++;
2410
2411         ff_hevc_save_states(s, ctb_addr_ts);
2412         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2413         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2414
2415         if (!more_data && (x_ctb+ctb_size) < s->ps.sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2416             avpriv_atomic_int_set(&s1->wpp_err,  1);
2417             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2418             return 0;
2419         }
2420
2421         if ((x_ctb+ctb_size) >= s->ps.sps->width && (y_ctb+ctb_size) >= s->ps.sps->height ) {
2422             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2423             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2424             return ctb_addr_ts;
2425         }
2426         ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2427         x_ctb+=ctb_size;
2428
2429         if(x_ctb >= s->ps.sps->width) {
2430             break;
2431         }
2432     }
2433     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2434
2435     return 0;
2436 }
2437
2438 static int hls_slice_data_wpp(HEVCContext *s, const HEVCNAL *nal)
2439 {
2440     const uint8_t *data = nal->data;
2441     int length          = nal->size;
2442     HEVCLocalContext *lc = s->HEVClc;
2443     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2444     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2445     int64_t offset;
2446     int startheader, cmpt = 0;
2447     int i, j, res = 0;
2448
2449     if (!ret || !arg) {
2450         av_free(ret);
2451         av_free(arg);
2452         return AVERROR(ENOMEM);
2453     }
2454
2455     if (s->sh.slice_ctb_addr_rs + s->sh.num_entry_point_offsets * s->ps.sps->ctb_width >= s->ps.sps->ctb_width * s->ps.sps->ctb_height) {
2456         av_log(s->avctx, AV_LOG_ERROR, "WPP ctb addresses are wrong (%d %d %d %d)\n",
2457             s->sh.slice_ctb_addr_rs, s->sh.num_entry_point_offsets,
2458             s->ps.sps->ctb_width, s->ps.sps->ctb_height
2459         );
2460         res = AVERROR_INVALIDDATA;
2461         goto error;
2462     }
2463
2464     ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2465
2466     if (!s->sList[1]) {
2467         for (i = 1; i < s->threads_number; i++) {
2468             s->sList[i] = av_malloc(sizeof(HEVCContext));
2469             memcpy(s->sList[i], s, sizeof(HEVCContext));
2470             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2471             s->sList[i]->HEVClc = s->HEVClcList[i];
2472         }
2473     }
2474
2475     offset = (lc->gb.index >> 3);
2476
2477     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
2478         if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2479             startheader--;
2480             cmpt++;
2481         }
2482     }
2483
2484     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2485         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2486         for (j = 0, cmpt = 0, startheader = offset
2487              + s->sh.entry_point_offset[i]; j < nal->skipped_bytes; j++) {
2488             if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
2489                 startheader--;
2490                 cmpt++;
2491             }
2492         }
2493         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2494         s->sh.offset[i - 1] = offset;
2495
2496     }
2497     if (s->sh.num_entry_point_offsets != 0) {
2498         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2499         if (length < offset) {
2500             av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
2501             res = AVERROR_INVALIDDATA;
2502             goto error;
2503         }
2504         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2505         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2506
2507     }
2508     s->data = data;
2509
2510     for (i = 1; i < s->threads_number; i++) {
2511         s->sList[i]->HEVClc->first_qp_group = 1;
2512         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2513         memcpy(s->sList[i], s, sizeof(HEVCContext));
2514         s->sList[i]->HEVClc = s->HEVClcList[i];
2515     }
2516
2517     avpriv_atomic_int_set(&s->wpp_err, 0);
2518     ff_reset_entries(s->avctx);
2519
2520     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2521         arg[i] = i;
2522         ret[i] = 0;
2523     }
2524
2525     if (s->ps.pps->entropy_coding_sync_enabled_flag)
2526         s->avctx->execute2(s->avctx, hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2527
2528     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2529         res += ret[i];
2530 error:
2531     av_free(ret);
2532     av_free(arg);
2533     return res;
2534 }
2535
2536 static int set_side_data(HEVCContext *s)
2537 {
2538     AVFrame *out = s->ref->frame;
2539
2540     if (s->sei_frame_packing_present &&
2541         s->frame_packing_arrangement_type >= 3 &&
2542         s->frame_packing_arrangement_type <= 5 &&
2543         s->content_interpretation_type > 0 &&
2544         s->content_interpretation_type < 3) {
2545         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2546         if (!stereo)
2547             return AVERROR(ENOMEM);
2548
2549         switch (s->frame_packing_arrangement_type) {
2550         case 3:
2551             if (s->quincunx_subsampling)
2552                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2553             else
2554                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2555             break;
2556         case 4:
2557             stereo->type = AV_STEREO3D_TOPBOTTOM;
2558             break;
2559         case 5:
2560             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2561             break;
2562         }
2563
2564         if (s->content_interpretation_type == 2)
2565             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2566     }
2567
2568     if (s->sei_display_orientation_present &&
2569         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2570         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2571         AVFrameSideData *rotation = av_frame_new_side_data(out,
2572                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2573                                                            sizeof(int32_t) * 9);
2574         if (!rotation)
2575             return AVERROR(ENOMEM);
2576
2577         av_display_rotation_set((int32_t *)rotation->data, angle);
2578         av_display_matrix_flip((int32_t *)rotation->data,
2579                                s->sei_hflip, s->sei_vflip);
2580     }
2581
2582     if (s->a53_caption) {
2583         AVFrameSideData* sd = av_frame_new_side_data(out,
2584                                                      AV_FRAME_DATA_A53_CC,
2585                                                      s->a53_caption_size);
2586         if (sd)
2587             memcpy(sd->data, s->a53_caption, s->a53_caption_size);
2588         av_freep(&s->a53_caption);
2589         s->a53_caption_size = 0;
2590         s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
2591     }
2592
2593     return 0;
2594 }
2595
2596 static int hevc_frame_start(HEVCContext *s)
2597 {
2598     HEVCLocalContext *lc = s->HEVClc;
2599     int pic_size_in_ctb  = ((s->ps.sps->width  >> s->ps.sps->log2_min_cb_size) + 1) *
2600                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
2601     int ret;
2602
2603     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2604     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2605     memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
2606     memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
2607     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2608
2609     s->is_decoded        = 0;
2610     s->first_nal_type    = s->nal_unit_type;
2611
2612     if (s->ps.pps->tiles_enabled_flag)
2613         lc->end_of_tiles_x = s->ps.pps->column_width[0] << s->ps.sps->log2_ctb_size;
2614
2615     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2616     if (ret < 0)
2617         goto fail;
2618
2619     ret = ff_hevc_frame_rps(s);
2620     if (ret < 0) {
2621         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2622         goto fail;
2623     }
2624
2625     s->ref->frame->key_frame = IS_IRAP(s);
2626
2627     ret = set_side_data(s);
2628     if (ret < 0)
2629         goto fail;
2630
2631     s->frame->pict_type = 3 - s->sh.slice_type;
2632
2633     if (!IS_IRAP(s))
2634         ff_hevc_bump_frame(s);
2635
2636     av_frame_unref(s->output_frame);
2637     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2638     if (ret < 0)
2639         goto fail;
2640
2641     if (!s->avctx->hwaccel)
2642         ff_thread_finish_setup(s->avctx);
2643
2644     return 0;
2645
2646 fail:
2647     if (s->ref)
2648         ff_hevc_unref_frame(s, s->ref, ~0);
2649     s->ref = NULL;
2650     return ret;
2651 }
2652
2653 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2654 {
2655     HEVCLocalContext *lc = s->HEVClc;
2656     GetBitContext *gb    = &lc->gb;
2657     int ctb_addr_ts, ret;
2658
2659     *gb              = nal->gb;
2660     s->nal_unit_type = nal->type;
2661     s->temporal_id   = nal->temporal_id;
2662
2663     switch (s->nal_unit_type) {
2664     case NAL_VPS:
2665         ret = ff_hevc_decode_nal_vps(gb, s->avctx, &s->ps);
2666         if (ret < 0)
2667             goto fail;
2668         break;
2669     case NAL_SPS:
2670         ret = ff_hevc_decode_nal_sps(gb, s->avctx, &s->ps,
2671                                      s->apply_defdispwin);
2672         if (ret < 0)
2673             goto fail;
2674         break;
2675     case NAL_PPS:
2676         ret = ff_hevc_decode_nal_pps(gb, s->avctx, &s->ps);
2677         if (ret < 0)
2678             goto fail;
2679         break;
2680     case NAL_SEI_PREFIX:
2681     case NAL_SEI_SUFFIX:
2682         ret = ff_hevc_decode_nal_sei(s);
2683         if (ret < 0)
2684             goto fail;
2685         break;
2686     case NAL_TRAIL_R:
2687     case NAL_TRAIL_N:
2688     case NAL_TSA_N:
2689     case NAL_TSA_R:
2690     case NAL_STSA_N:
2691     case NAL_STSA_R:
2692     case NAL_BLA_W_LP:
2693     case NAL_BLA_W_RADL:
2694     case NAL_BLA_N_LP:
2695     case NAL_IDR_W_RADL:
2696     case NAL_IDR_N_LP:
2697     case NAL_CRA_NUT:
2698     case NAL_RADL_N:
2699     case NAL_RADL_R:
2700     case NAL_RASL_N:
2701     case NAL_RASL_R:
2702         ret = hls_slice_header(s);
2703         if (ret < 0)
2704             return ret;
2705
2706         if (s->max_ra == INT_MAX) {
2707             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2708                 s->max_ra = s->poc;
2709             } else {
2710                 if (IS_IDR(s))
2711                     s->max_ra = INT_MIN;
2712             }
2713         }
2714
2715         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2716             s->poc <= s->max_ra) {
2717             s->is_decoded = 0;
2718             break;
2719         } else {
2720             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2721                 s->max_ra = INT_MIN;
2722         }
2723
2724         if (s->sh.first_slice_in_pic_flag) {
2725             ret = hevc_frame_start(s);
2726             if (ret < 0)
2727                 return ret;
2728         } else if (!s->ref) {
2729             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2730             goto fail;
2731         }
2732
2733         if (s->nal_unit_type != s->first_nal_type) {
2734             av_log(s->avctx, AV_LOG_ERROR,
2735                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2736                    s->first_nal_type, s->nal_unit_type);
2737             return AVERROR_INVALIDDATA;
2738         }
2739
2740         if (!s->sh.dependent_slice_segment_flag &&
2741             s->sh.slice_type != I_SLICE) {
2742             ret = ff_hevc_slice_rpl(s);
2743             if (ret < 0) {
2744                 av_log(s->avctx, AV_LOG_WARNING,
2745                        "Error constructing the reference lists for the current slice.\n");
2746                 goto fail;
2747             }
2748         }
2749
2750         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2751             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2752             if (ret < 0)
2753                 goto fail;
2754         }
2755
2756         if (s->avctx->hwaccel) {
2757             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2758             if (ret < 0)
2759                 goto fail;
2760         } else {
2761             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2762                 ctb_addr_ts = hls_slice_data_wpp(s, nal);
2763             else
2764                 ctb_addr_ts = hls_slice_data(s);
2765             if (ctb_addr_ts >= (s->ps.sps->ctb_width * s->ps.sps->ctb_height)) {
2766                 s->is_decoded = 1;
2767             }
2768
2769             if (ctb_addr_ts < 0) {
2770                 ret = ctb_addr_ts;
2771                 goto fail;
2772             }
2773         }
2774         break;
2775     case NAL_EOS_NUT:
2776     case NAL_EOB_NUT:
2777         s->seq_decode = (s->seq_decode + 1) & 0xff;
2778         s->max_ra     = INT_MAX;
2779         break;
2780     case NAL_AUD:
2781     case NAL_FD_NUT:
2782         break;
2783     default:
2784         av_log(s->avctx, AV_LOG_INFO,
2785                "Skipping NAL unit %d\n", s->nal_unit_type);
2786     }
2787
2788     return 0;
2789 fail:
2790     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2791         return ret;
2792     return 0;
2793 }
2794
2795 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2796 {
2797     int i, ret = 0;
2798
2799     s->ref = NULL;
2800     s->last_eos = s->eos;
2801     s->eos = 0;
2802
2803     /* split the input packet into NAL units, so we know the upper bound on the
2804      * number of slices in the frame */
2805     ret = ff_hevc_split_packet(s, &s->pkt, buf, length, s->avctx, s->is_nalff,
2806                                s->nal_length_size);
2807     if (ret < 0) {
2808         av_log(s->avctx, AV_LOG_ERROR,
2809                "Error splitting the input into NAL units.\n");
2810         return ret;
2811     }
2812
2813     for (i = 0; i < s->pkt.nb_nals; i++) {
2814         if (s->pkt.nals[i].type == NAL_EOB_NUT ||
2815             s->pkt.nals[i].type == NAL_EOS_NUT)
2816             s->eos = 1;
2817     }
2818
2819     /* decode the NAL units */
2820     for (i = 0; i < s->pkt.nb_nals; i++) {
2821         ret = decode_nal_unit(s, &s->pkt.nals[i]);
2822         if (ret < 0) {
2823             av_log(s->avctx, AV_LOG_WARNING,
2824                    "Error parsing NAL unit #%d.\n", i);
2825             goto fail;
2826         }
2827     }
2828
2829 fail:
2830     if (s->ref && s->threads_type == FF_THREAD_FRAME)
2831         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
2832
2833     return ret;
2834 }
2835
2836 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
2837 {
2838     int i;
2839     for (i = 0; i < 16; i++)
2840         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
2841 }
2842
2843 static int verify_md5(HEVCContext *s, AVFrame *frame)
2844 {
2845     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
2846     int pixel_shift;
2847     int i, j;
2848
2849     if (!desc)
2850         return AVERROR(EINVAL);
2851
2852     pixel_shift = desc->comp[0].depth > 8;
2853
2854     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
2855            s->poc);
2856
2857     /* the checksums are LE, so we have to byteswap for >8bpp formats
2858      * on BE arches */
2859 #if HAVE_BIGENDIAN
2860     if (pixel_shift && !s->checksum_buf) {
2861         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
2862                        FFMAX3(frame->linesize[0], frame->linesize[1],
2863                               frame->linesize[2]));
2864         if (!s->checksum_buf)
2865             return AVERROR(ENOMEM);
2866     }
2867 #endif
2868
2869     for (i = 0; frame->data[i]; i++) {
2870         int width  = s->avctx->coded_width;
2871         int height = s->avctx->coded_height;
2872         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
2873         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
2874         uint8_t md5[16];
2875
2876         av_md5_init(s->md5_ctx);
2877         for (j = 0; j < h; j++) {
2878             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
2879 #if HAVE_BIGENDIAN
2880             if (pixel_shift) {
2881                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
2882                                     (const uint16_t *) src, w);
2883                 src = s->checksum_buf;
2884             }
2885 #endif
2886             av_md5_update(s->md5_ctx, src, w << pixel_shift);
2887         }
2888         av_md5_final(s->md5_ctx, md5);
2889
2890         if (!memcmp(md5, s->md5[i], 16)) {
2891             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
2892             print_md5(s->avctx, AV_LOG_DEBUG, md5);
2893             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
2894         } else {
2895             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
2896             print_md5(s->avctx, AV_LOG_ERROR, md5);
2897             av_log   (s->avctx, AV_LOG_ERROR, " != ");
2898             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
2899             av_log   (s->avctx, AV_LOG_ERROR, "\n");
2900             return AVERROR_INVALIDDATA;
2901         }
2902     }
2903
2904     av_log(s->avctx, AV_LOG_DEBUG, "\n");
2905
2906     return 0;
2907 }
2908
2909 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
2910                              AVPacket *avpkt)
2911 {
2912     int ret;
2913     HEVCContext *s = avctx->priv_data;
2914
2915     if (!avpkt->size) {
2916         ret = ff_hevc_output_frame(s, data, 1);
2917         if (ret < 0)
2918             return ret;
2919
2920         *got_output = ret;
2921         return 0;
2922     }
2923
2924     s->ref = NULL;
2925     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
2926     if (ret < 0)
2927         return ret;
2928
2929     if (avctx->hwaccel) {
2930         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
2931             av_log(avctx, AV_LOG_ERROR,
2932                    "hardware accelerator failed to decode picture\n");
2933             ff_hevc_unref_frame(s, s->ref, ~0);
2934             return ret;
2935         }
2936     } else {
2937         /* verify the SEI checksum */
2938         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
2939             s->is_md5) {
2940             ret = verify_md5(s, s->ref->frame);
2941             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
2942                 ff_hevc_unref_frame(s, s->ref, ~0);
2943                 return ret;
2944             }
2945         }
2946     }
2947     s->is_md5 = 0;
2948
2949     if (s->is_decoded) {
2950         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
2951         s->is_decoded = 0;
2952     }
2953
2954     if (s->output_frame->buf[0]) {
2955         av_frame_move_ref(data, s->output_frame);
2956         *got_output = 1;
2957     }
2958
2959     return avpkt->size;
2960 }
2961
2962 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
2963 {
2964     int ret;
2965
2966     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
2967     if (ret < 0)
2968         return ret;
2969
2970     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
2971     if (!dst->tab_mvf_buf)
2972         goto fail;
2973     dst->tab_mvf = src->tab_mvf;
2974
2975     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
2976     if (!dst->rpl_tab_buf)
2977         goto fail;
2978     dst->rpl_tab = src->rpl_tab;
2979
2980     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
2981     if (!dst->rpl_buf)
2982         goto fail;
2983
2984     dst->poc        = src->poc;
2985     dst->ctb_count  = src->ctb_count;
2986     dst->window     = src->window;
2987     dst->flags      = src->flags;
2988     dst->sequence   = src->sequence;
2989
2990     if (src->hwaccel_picture_private) {
2991         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
2992         if (!dst->hwaccel_priv_buf)
2993             goto fail;
2994         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
2995     }
2996
2997     return 0;
2998 fail:
2999     ff_hevc_unref_frame(s, dst, ~0);
3000     return AVERROR(ENOMEM);
3001 }
3002
3003 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3004 {
3005     HEVCContext       *s = avctx->priv_data;
3006     int i;
3007
3008     pic_arrays_free(s);
3009
3010     av_freep(&s->md5_ctx);
3011
3012     av_freep(&s->cabac_state);
3013
3014     for (i = 0; i < 3; i++) {
3015         av_freep(&s->sao_pixel_buffer_h[i]);
3016         av_freep(&s->sao_pixel_buffer_v[i]);
3017     }
3018     av_frame_free(&s->output_frame);
3019
3020     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3021         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3022         av_frame_free(&s->DPB[i].frame);
3023     }
3024
3025     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
3026         av_buffer_unref(&s->ps.vps_list[i]);
3027     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
3028         av_buffer_unref(&s->ps.sps_list[i]);
3029     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
3030         av_buffer_unref(&s->ps.pps_list[i]);
3031     s->ps.sps = NULL;
3032     s->ps.pps = NULL;
3033     s->ps.vps = NULL;
3034
3035     av_freep(&s->sh.entry_point_offset);
3036     av_freep(&s->sh.offset);
3037     av_freep(&s->sh.size);
3038
3039     for (i = 1; i < s->threads_number; i++) {
3040         HEVCLocalContext *lc = s->HEVClcList[i];
3041         if (lc) {
3042             av_freep(&s->HEVClcList[i]);
3043             av_freep(&s->sList[i]);
3044         }
3045     }
3046     if (s->HEVClc == s->HEVClcList[0])
3047         s->HEVClc = NULL;
3048     av_freep(&s->HEVClcList[0]);
3049
3050     for (i = 0; i < s->pkt.nals_allocated; i++) {
3051         av_freep(&s->pkt.nals[i].rbsp_buffer);
3052         av_freep(&s->pkt.nals[i].skipped_bytes_pos);
3053     }
3054     av_freep(&s->pkt.nals);
3055     s->pkt.nals_allocated = 0;
3056
3057     return 0;
3058 }
3059
3060 static av_cold int hevc_init_context(AVCodecContext *avctx)
3061 {
3062     HEVCContext *s = avctx->priv_data;
3063     int i;
3064
3065     s->avctx = avctx;
3066
3067     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3068     if (!s->HEVClc)
3069         goto fail;
3070     s->HEVClcList[0] = s->HEVClc;
3071     s->sList[0] = s;
3072
3073     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3074     if (!s->cabac_state)
3075         goto fail;
3076
3077     s->output_frame = av_frame_alloc();
3078     if (!s->output_frame)
3079         goto fail;
3080
3081     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3082         s->DPB[i].frame = av_frame_alloc();
3083         if (!s->DPB[i].frame)
3084             goto fail;
3085         s->DPB[i].tf.f = s->DPB[i].frame;
3086     }
3087
3088     s->max_ra = INT_MAX;
3089
3090     s->md5_ctx = av_md5_alloc();
3091     if (!s->md5_ctx)
3092         goto fail;
3093
3094     ff_bswapdsp_init(&s->bdsp);
3095
3096     s->context_initialized = 1;
3097     s->eos = 0;
3098
3099     return 0;
3100
3101 fail:
3102     hevc_decode_free(avctx);
3103     return AVERROR(ENOMEM);
3104 }
3105
3106 static int hevc_update_thread_context(AVCodecContext *dst,
3107                                       const AVCodecContext *src)
3108 {
3109     HEVCContext *s  = dst->priv_data;
3110     HEVCContext *s0 = src->priv_data;
3111     int i, ret;
3112
3113     if (!s->context_initialized) {
3114         ret = hevc_init_context(dst);
3115         if (ret < 0)
3116             return ret;
3117     }
3118
3119     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3120         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3121         if (s0->DPB[i].frame->buf[0]) {
3122             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3123             if (ret < 0)
3124                 return ret;
3125         }
3126     }
3127
3128     if (s->ps.sps != s0->ps.sps)
3129         s->ps.sps = NULL;
3130     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
3131         av_buffer_unref(&s->ps.vps_list[i]);
3132         if (s0->ps.vps_list[i]) {
3133             s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
3134             if (!s->ps.vps_list[i])
3135                 return AVERROR(ENOMEM);
3136         }
3137     }
3138
3139     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3140         av_buffer_unref(&s->ps.sps_list[i]);
3141         if (s0->ps.sps_list[i]) {
3142             s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
3143             if (!s->ps.sps_list[i])
3144                 return AVERROR(ENOMEM);
3145         }
3146     }
3147
3148     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
3149         av_buffer_unref(&s->ps.pps_list[i]);
3150         if (s0->ps.pps_list[i]) {
3151             s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
3152             if (!s->ps.pps_list[i])
3153                 return AVERROR(ENOMEM);
3154         }
3155     }
3156
3157     if (s->ps.sps != s0->ps.sps)
3158         if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
3159             return ret;
3160
3161     s->seq_decode = s0->seq_decode;
3162     s->seq_output = s0->seq_output;
3163     s->pocTid0    = s0->pocTid0;
3164     s->max_ra     = s0->max_ra;
3165     s->eos        = s0->eos;
3166     s->no_rasl_output_flag = s0->no_rasl_output_flag;
3167
3168     s->is_nalff        = s0->is_nalff;
3169     s->nal_length_size = s0->nal_length_size;
3170
3171     s->threads_number      = s0->threads_number;
3172     s->threads_type        = s0->threads_type;
3173
3174     if (s0->eos) {
3175         s->seq_decode = (s->seq_decode + 1) & 0xff;
3176         s->max_ra = INT_MAX;
3177     }
3178
3179     return 0;
3180 }
3181
3182 static int hevc_decode_extradata(HEVCContext *s)
3183 {
3184     AVCodecContext *avctx = s->avctx;
3185     GetByteContext gb;
3186     int ret, i;
3187
3188     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3189
3190     if (avctx->extradata_size > 3 &&
3191         (avctx->extradata[0] || avctx->extradata[1] ||
3192          avctx->extradata[2] > 1)) {
3193         /* It seems the extradata is encoded as hvcC format.
3194          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3195          * is finalized. When finalized, configurationVersion will be 1 and we
3196          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3197         int i, j, num_arrays, nal_len_size;
3198
3199         s->is_nalff = 1;
3200
3201         bytestream2_skip(&gb, 21);
3202         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3203         num_arrays   = bytestream2_get_byte(&gb);
3204
3205         /* nal units in the hvcC always have length coded with 2 bytes,
3206          * so put a fake nal_length_size = 2 while parsing them */
3207         s->nal_length_size = 2;
3208
3209         /* Decode nal units from hvcC. */
3210         for (i = 0; i < num_arrays; i++) {
3211             int type = bytestream2_get_byte(&gb) & 0x3f;
3212             int cnt  = bytestream2_get_be16(&gb);
3213
3214             for (j = 0; j < cnt; j++) {
3215                 // +2 for the nal size field
3216                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3217                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3218                     av_log(s->avctx, AV_LOG_ERROR,
3219                            "Invalid NAL unit size in extradata.\n");
3220                     return AVERROR_INVALIDDATA;
3221                 }
3222
3223                 ret = decode_nal_units(s, gb.buffer, nalsize);
3224                 if (ret < 0) {
3225                     av_log(avctx, AV_LOG_ERROR,
3226                            "Decoding nal unit %d %d from hvcC failed\n",
3227                            type, i);
3228                     return ret;
3229                 }
3230                 bytestream2_skip(&gb, nalsize);
3231             }
3232         }
3233
3234         /* Now store right nal length size, that will be used to parse
3235          * all other nals */
3236         s->nal_length_size = nal_len_size;
3237     } else {
3238         s->is_nalff = 0;
3239         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3240         if (ret < 0)
3241             return ret;
3242     }
3243
3244     /* export stream parameters from the first SPS */
3245     for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
3246         if (s->ps.sps_list[i]) {
3247             const HEVCSPS *sps = (const HEVCSPS*)s->ps.sps_list[i]->data;
3248             export_stream_params(s->avctx, &s->ps, sps);
3249             break;
3250         }
3251     }
3252
3253     return 0;
3254 }
3255
3256 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3257 {
3258     HEVCContext *s = avctx->priv_data;
3259     int ret;
3260
3261     avctx->internal->allocate_progress = 1;
3262
3263     ret = hevc_init_context(avctx);
3264     if (ret < 0)
3265         return ret;
3266
3267     s->enable_parallel_tiles = 0;
3268     s->picture_struct = 0;
3269     s->eos = 1;
3270
3271     if(avctx->active_thread_type & FF_THREAD_SLICE)
3272         s->threads_number = avctx->thread_count;
3273     else
3274         s->threads_number = 1;
3275
3276     if (avctx->extradata_size > 0 && avctx->extradata) {
3277         ret = hevc_decode_extradata(s);
3278         if (ret < 0) {
3279             hevc_decode_free(avctx);
3280             return ret;
3281         }
3282     }
3283
3284     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3285             s->threads_type = FF_THREAD_FRAME;
3286         else
3287             s->threads_type = FF_THREAD_SLICE;
3288
3289     return 0;
3290 }
3291
3292 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3293 {
3294     HEVCContext *s = avctx->priv_data;
3295     int ret;
3296
3297     memset(s, 0, sizeof(*s));
3298
3299     ret = hevc_init_context(avctx);
3300     if (ret < 0)
3301         return ret;
3302
3303     return 0;
3304 }
3305
3306 static void hevc_decode_flush(AVCodecContext *avctx)
3307 {
3308     HEVCContext *s = avctx->priv_data;
3309     ff_hevc_flush_dpb(s);
3310     s->max_ra = INT_MAX;
3311     s->eos = 1;
3312 }
3313
3314 #define OFFSET(x) offsetof(HEVCContext, x)
3315 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3316
3317 static const AVProfile profiles[] = {
3318     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3319     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3320     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3321     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3322     { FF_PROFILE_UNKNOWN },
3323 };
3324
3325 static const AVOption options[] = {
3326     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3327         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3328     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3329         AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
3330     { NULL },
3331 };
3332
3333 static const AVClass hevc_decoder_class = {
3334     .class_name = "HEVC decoder",
3335     .item_name  = av_default_item_name,
3336     .option     = options,
3337     .version    = LIBAVUTIL_VERSION_INT,
3338 };
3339
3340 AVCodec ff_hevc_decoder = {
3341     .name                  = "hevc",
3342     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3343     .type                  = AVMEDIA_TYPE_VIDEO,
3344     .id                    = AV_CODEC_ID_HEVC,
3345     .priv_data_size        = sizeof(HEVCContext),
3346     .priv_class            = &hevc_decoder_class,
3347     .init                  = hevc_decode_init,
3348     .close                 = hevc_decode_free,
3349     .decode                = hevc_decode_frame,
3350     .flush                 = hevc_decode_flush,
3351     .update_thread_context = hevc_update_thread_context,
3352     .init_thread_copy      = hevc_init_thread_copy,
3353     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
3354                              AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_FRAME_THREADS,
3355     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3356 };