git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx,
 284                                  const HEVCContext *s, const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     export_stream_params(s->avctx, s, sps);
 336
 337     pic_arrays_free(s);
 338     ret = pic_arrays_init(s, sps);
 339     if (ret < 0)
 340         goto fail;
 341
 342     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 343 #if CONFIG_HEVC_DXVA2_HWACCEL
 344         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 345 #endif
 346     }
 347
 348     if (pix_fmt == AV_PIX_FMT_NONE) {
 349         *fmt++ = sps->pix_fmt;
 350         *fmt = AV_PIX_FMT_NONE;
 351
 352         ret = ff_thread_get_format(s->avctx, pix_fmts);
 353         if (ret < 0)
 354             goto fail;
 355         s->avctx->pix_fmt = ret;
 356     }
 357     else {
 358         s->avctx->pix_fmt = pix_fmt;
 359     }
 360
 361     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 362     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 363     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 364
 365     for (i = 0; i < 3; i++) {
 366         av_freep(&s->sao_pixel_buffer_h[i]);
 367         av_freep(&s->sao_pixel_buffer_v[i]);
 368     }
 369
 370     if (sps->sao_enabled && !s->avctx->hwaccel) {
 371         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 372         int c_idx;
 373
 374         for(c_idx = 0; c_idx < c_count; c_idx++) {
 375             int w = sps->width >> sps->hshift[c_idx];
 376             int h = sps->height >> sps->vshift[c_idx];
 377             s->sao_pixel_buffer_h[c_idx] =
 378                 av_malloc((w * 2 * sps->ctb_height) <<
 379                           sps->pixel_shift);
 380             s->sao_pixel_buffer_v[c_idx] =
 381                 av_malloc((h * 2 * sps->ctb_width) <<
 382                           sps->pixel_shift);
 383         }
 384     }
 385
 386     s->sps = sps;
 387     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 388
 389     return 0;
 390
 391 fail:
 392     pic_arrays_free(s);
 393     s->sps = NULL;
 394     return ret;
 395 }
 396
 397 static int hls_slice_header(HEVCContext *s)
 398 {
 399     GetBitContext *gb = &s->HEVClc->gb;
 400     SliceHeader *sh   = &s->sh;
 401     int i, j, ret;
 402
 403     // Coded parameters
 404     sh->first_slice_in_pic_flag = get_bits1(gb);
 405     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 406         s->seq_decode = (s->seq_decode + 1) & 0xff;
 407         s->max_ra     = INT_MAX;
 408         if (IS_IDR(s))
 409             ff_hevc_clear_refs(s);
 410     }
 411     sh->no_output_of_prior_pics_flag = 0;
 412     if (IS_IRAP(s))
 413         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 414
 415     sh->pps_id = get_ue_golomb_long(gb);
 416     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 417         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 418         return AVERROR_INVALIDDATA;
 419     }
 420     if (!sh->first_slice_in_pic_flag &&
 421         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 422         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 423         return AVERROR_INVALIDDATA;
 424     }
 425     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 426     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 427         sh->no_output_of_prior_pics_flag = 1;
 428
 429     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 430         const HEVCSPS* last_sps = s->sps;
 431         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 432         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 433             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 434                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 435                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 436                 sh->no_output_of_prior_pics_flag = 0;
 437         }
 438         ff_hevc_clear_refs(s);
 439         ret = set_sps(s, s->sps, AV_PIX_FMT_NONE);
 440         if (ret < 0)
 441             return ret;
 442
 443         s->seq_decode = (s->seq_decode + 1) & 0xff;
 444         s->max_ra     = INT_MAX;
 445     }
 446
 447     sh->dependent_slice_segment_flag = 0;
 448     if (!sh->first_slice_in_pic_flag) {
 449         int slice_address_length;
 450
 451         if (s->pps->dependent_slice_segments_enabled_flag)
 452             sh->dependent_slice_segment_flag = get_bits1(gb);
 453
 454         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 455                                             s->sps->ctb_height);
 456         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 457         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 458             av_log(s->avctx, AV_LOG_ERROR,
 459                    "Invalid slice segment address: %u.\n",
 460                    sh->slice_segment_addr);
 461             return AVERROR_INVALIDDATA;
 462         }
 463
 464         if (!sh->dependent_slice_segment_flag) {
 465             sh->slice_addr = sh->slice_segment_addr;
 466             s->slice_idx++;
 467         }
 468     } else {
 469         sh->slice_segment_addr = sh->slice_addr = 0;
 470         s->slice_idx           = 0;
 471         s->slice_initialized   = 0;
 472     }
 473
 474     if (!sh->dependent_slice_segment_flag) {
 475         s->slice_initialized = 0;
 476
 477         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 478             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 479
 480         sh->slice_type = get_ue_golomb_long(gb);
 481         if (!(sh->slice_type == I_SLICE ||
 482               sh->slice_type == P_SLICE ||
 483               sh->slice_type == B_SLICE)) {
 484             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 485                    sh->slice_type);
 486             return AVERROR_INVALIDDATA;
 487         }
 488         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 489             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 490             return AVERROR_INVALIDDATA;
 491         }
 492
 493         // when flag is not present, picture is inferred to be output
 494         sh->pic_output_flag = 1;
 495         if (s->pps->output_flag_present_flag)
 496             sh->pic_output_flag = get_bits1(gb);
 497
 498         if (s->sps->separate_colour_plane_flag)
 499             sh->colour_plane_id = get_bits(gb, 2);
 500
 501         if (!IS_IDR(s)) {
 502             int poc;
 503
 504             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 505             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 506             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 507                 av_log(s->avctx, AV_LOG_WARNING,
 508                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 509                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 510                     return AVERROR_INVALIDDATA;
 511                 poc = s->poc;
 512             }
 513             s->poc = poc;
 514
 515             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 516             if (!sh->short_term_ref_pic_set_sps_flag) {
 517                 int pos = get_bits_left(gb);
 518                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 519                 if (ret < 0)
 520                     return ret;
 521
 522                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 523                 sh->short_term_rps = &sh->slice_rps;
 524             } else {
 525                 int numbits, rps_idx;
 526
 527                 if (!s->sps->nb_st_rps) {
 528                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 529                     return AVERROR_INVALIDDATA;
 530                 }
 531
 532                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 533                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 534                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 535             }
 536
 537             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 538             if (ret < 0) {
 539                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 540                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 541                     return AVERROR_INVALIDDATA;
 542             }
 543
 544             if (s->sps->sps_temporal_mvp_enabled_flag)
 545                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 546             else
 547                 sh->slice_temporal_mvp_enabled_flag = 0;
 548         } else {
 549             s->sh.short_term_rps = NULL;
 550             s->poc               = 0;
 551         }
 552
 553         /* 8.3.1 */
 554         if (s->temporal_id == 0 &&
 555             s->nal_unit_type != NAL_TRAIL_N &&
 556             s->nal_unit_type != NAL_TSA_N   &&
 557             s->nal_unit_type != NAL_STSA_N  &&
 558             s->nal_unit_type != NAL_RADL_N  &&
 559             s->nal_unit_type != NAL_RADL_R  &&
 560             s->nal_unit_type != NAL_RASL_N  &&
 561             s->nal_unit_type != NAL_RASL_R)
 562             s->pocTid0 = s->poc;
 563
 564         if (s->sps->sao_enabled) {
 565             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 566             if (s->sps->chroma_format_idc) {
 567                 sh->slice_sample_adaptive_offset_flag[1] =
 568                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 569             }
 570         } else {
 571             sh->slice_sample_adaptive_offset_flag[0] = 0;
 572             sh->slice_sample_adaptive_offset_flag[1] = 0;
 573             sh->slice_sample_adaptive_offset_flag[2] = 0;
 574         }
 575
 576         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 577         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 578             int nb_refs;
 579
 580             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 581             if (sh->slice_type == B_SLICE)
 582                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 583
 584             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 585                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 586                 if (sh->slice_type == B_SLICE)
 587                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 588             }
 589             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 590                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 591                        sh->nb_refs[L0], sh->nb_refs[L1]);
 592                 return AVERROR_INVALIDDATA;
 593             }
 594
 595             sh->rpl_modification_flag[0] = 0;
 596             sh->rpl_modification_flag[1] = 0;
 597             nb_refs = ff_hevc_frame_nb_refs(s);
 598             if (!nb_refs) {
 599                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 600                 return AVERROR_INVALIDDATA;
 601             }
 602
 603             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 604                 sh->rpl_modification_flag[0] = get_bits1(gb);
 605                 if (sh->rpl_modification_flag[0]) {
 606                     for (i = 0; i < sh->nb_refs[L0]; i++)
 607                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 608                 }
 609
 610                 if (sh->slice_type == B_SLICE) {
 611                     sh->rpl_modification_flag[1] = get_bits1(gb);
 612                     if (sh->rpl_modification_flag[1] == 1)
 613                         for (i = 0; i < sh->nb_refs[L1]; i++)
 614                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 615                 }
 616             }
 617
 618             if (sh->slice_type == B_SLICE)
 619                 sh->mvd_l1_zero_flag = get_bits1(gb);
 620
 621             if (s->pps->cabac_init_present_flag)
 622                 sh->cabac_init_flag = get_bits1(gb);
 623             else
 624                 sh->cabac_init_flag = 0;
 625
 626             sh->collocated_ref_idx = 0;
 627             if (sh->slice_temporal_mvp_enabled_flag) {
 628                 sh->collocated_list = L0;
 629                 if (sh->slice_type == B_SLICE)
 630                     sh->collocated_list = !get_bits1(gb);
 631
 632                 if (sh->nb_refs[sh->collocated_list] > 1) {
 633                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 634                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 635                         av_log(s->avctx, AV_LOG_ERROR,
 636                                "Invalid collocated_ref_idx: %d.\n",
 637                                sh->collocated_ref_idx);
 638                         return AVERROR_INVALIDDATA;
 639                     }
 640                 }
 641             }
 642
 643             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 644                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 645                 pred_weight_table(s, gb);
 646             }
 647
 648             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 649             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 650                 av_log(s->avctx, AV_LOG_ERROR,
 651                        "Invalid number of merging MVP candidates: %d.\n",
 652                        sh->max_num_merge_cand);
 653                 return AVERROR_INVALIDDATA;
 654             }
 655         }
 656
 657         sh->slice_qp_delta = get_se_golomb(gb);
 658
 659         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 660             sh->slice_cb_qp_offset = get_se_golomb(gb);
 661             sh->slice_cr_qp_offset = get_se_golomb(gb);
 662         } else {
 663             sh->slice_cb_qp_offset = 0;
 664             sh->slice_cr_qp_offset = 0;
 665         }
 666
 667         if (s->pps->chroma_qp_offset_list_enabled_flag)
 668             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 669         else
 670             sh->cu_chroma_qp_offset_enabled_flag = 0;
 671
 672         if (s->pps->deblocking_filter_control_present_flag) {
 673             int deblocking_filter_override_flag = 0;
 674
 675             if (s->pps->deblocking_filter_override_enabled_flag)
 676                 deblocking_filter_override_flag = get_bits1(gb);
 677
 678             if (deblocking_filter_override_flag) {
 679                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 680                 if (!sh->disable_deblocking_filter_flag) {
 681                     sh->beta_offset = get_se_golomb(gb) * 2;
 682                     sh->tc_offset   = get_se_golomb(gb) * 2;
 683                 }
 684             } else {
 685                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 686                 sh->beta_offset                    = s->pps->beta_offset;
 687                 sh->tc_offset                      = s->pps->tc_offset;
 688             }
 689         } else {
 690             sh->disable_deblocking_filter_flag = 0;
 691             sh->beta_offset                    = 0;
 692             sh->tc_offset                      = 0;
 693         }
 694
 695         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 696             (sh->slice_sample_adaptive_offset_flag[0] ||
 697              sh->slice_sample_adaptive_offset_flag[1] ||
 698              !sh->disable_deblocking_filter_flag)) {
 699             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 700         } else {
 701             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 702         }
 703     } else if (!s->slice_initialized) {
 704         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 705         return AVERROR_INVALIDDATA;
 706     }
 707
 708     sh->num_entry_point_offsets = 0;
 709     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 710         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
 711         if (sh->num_entry_point_offsets > 0) {
 712             int offset_len = get_ue_golomb_long(gb) + 1;
 713             int segments = offset_len >> 4;
 714             int rest = (offset_len & 15);
 715             av_freep(&sh->entry_point_offset);
 716             av_freep(&sh->offset);
 717             av_freep(&sh->size);
 718             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 719             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 720             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 721             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 722                 sh->num_entry_point_offsets = 0;
 723                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 724                 return AVERROR(ENOMEM);
 725             }
 726             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 727                 int val = 0;
 728                 for (j = 0; j < segments; j++) {
 729                     val <<= 16;
 730                     val += get_bits(gb, 16);
 731                 }
 732                 if (rest) {
 733                     val <<= rest;
 734                     val += get_bits(gb, rest);
 735                 }
 736                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 737             }
 738             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 739                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 740                 s->threads_number = 1;
 741             } else
 742                 s->enable_parallel_tiles = 0;
 743         } else
 744             s->enable_parallel_tiles = 0;
 745     }
 746
 747     if (s->pps->slice_header_extension_present_flag) {
 748         unsigned int length = get_ue_golomb_long(gb);
 749         if (length*8LL > get_bits_left(gb)) {
 750             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 751             return AVERROR_INVALIDDATA;
 752         }
 753         for (i = 0; i < length; i++)
 754             skip_bits(gb, 8);  // slice_header_extension_data_byte
 755     }
 756
 757     // Inferred parameters
 758     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 759     if (sh->slice_qp > 51 ||
 760         sh->slice_qp < -s->sps->qp_bd_offset) {
 761         av_log(s->avctx, AV_LOG_ERROR,
 762                "The slice_qp %d is outside the valid range "
 763                "[%d, 51].\n",
 764                sh->slice_qp,
 765                -s->sps->qp_bd_offset);
 766         return AVERROR_INVALIDDATA;
 767     }
 768
 769     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 770
 771     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 772         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 773         return AVERROR_INVALIDDATA;
 774     }
 775
 776     if (get_bits_left(gb) < 0) {
 777         av_log(s->avctx, AV_LOG_ERROR,
 778                "Overread slice header by %d bits\n", -get_bits_left(gb));
 779         return AVERROR_INVALIDDATA;
 780     }
 781
 782     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 783
 784     if (!s->pps->cu_qp_delta_enabled_flag)
 785         s->HEVClc->qp_y = s->sh.slice_qp;
 786
 787     s->slice_initialized = 1;
 788     s->HEVClc->tu.cu_qp_offset_cb = 0;
 789     s->HEVClc->tu.cu_qp_offset_cr = 0;
 790
 791     return 0;
 792 }
 793
 794 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 795
 796 #define SET_SAO(elem, value)                            \
 797 do {                                                    \
 798     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 799         sao->elem = value;                              \
 800     else if (sao_merge_left_flag)                       \
 801         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 802     else if (sao_merge_up_flag)                         \
 803         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 804     else                                                \
 805         sao->elem = 0;                                  \
 806 } while (0)
 807
 808 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 809 {
 810     HEVCLocalContext *lc    = s->HEVClc;
 811     int sao_merge_left_flag = 0;
 812     int sao_merge_up_flag   = 0;
 813     SAOParams *sao          = &CTB(s->sao, rx, ry);
 814     int c_idx, i;
 815
 816     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 817         s->sh.slice_sample_adaptive_offset_flag[1]) {
 818         if (rx > 0) {
 819             if (lc->ctb_left_flag)
 820                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 821         }
 822         if (ry > 0 && !sao_merge_left_flag) {
 823             if (lc->ctb_up_flag)
 824                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 825         }
 826     }
 827
 828     for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
 829         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 830                                                  s->pps->log2_sao_offset_scale_chroma;
 831
 832         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 833             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 834             continue;
 835         }
 836
 837         if (c_idx == 2) {
 838             sao->type_idx[2] = sao->type_idx[1];
 839             sao->eo_class[2] = sao->eo_class[1];
 840         } else {
 841             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 842         }
 843
 844         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 845             continue;
 846
 847         for (i = 0; i < 4; i++)
 848             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 849
 850         if (sao->type_idx[c_idx] == SAO_BAND) {
 851             for (i = 0; i < 4; i++) {
 852                 if (sao->offset_abs[c_idx][i]) {
 853                     SET_SAO(offset_sign[c_idx][i],
 854                             ff_hevc_sao_offset_sign_decode(s));
 855                 } else {
 856                     sao->offset_sign[c_idx][i] = 0;
 857                 }
 858             }
 859             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 860         } else if (c_idx != 2) {
 861             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 862         }
 863
 864         // Inferred parameters
 865         sao->offset_val[c_idx][0] = 0;
 866         for (i = 0; i < 4; i++) {
 867             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 868             if (sao->type_idx[c_idx] == SAO_EDGE) {
 869                 if (i > 1)
 870                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 871             } else if (sao->offset_sign[c_idx][i]) {
 872                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 873             }
 874             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 875         }
 876     }
 877 }
 878
 879 #undef SET_SAO
 880 #undef CTB
 881
 882 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 883     HEVCLocalContext *lc    = s->HEVClc;
 884     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 885
 886     if (log2_res_scale_abs_plus1 !=  0) {
 887         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 888         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 889                                (1 - 2 * res_scale_sign_flag);
 890     } else {
 891         lc->tu.res_scale_val = 0;
 892     }
 893
 894
 895     return 0;
 896 }
 897
 898 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 899                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 900                               int log2_cb_size, int log2_trafo_size,
 901                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 902 {
 903     HEVCLocalContext *lc = s->HEVClc;
 904     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 905     int i;
 906
 907     if (lc->cu.pred_mode == MODE_INTRA) {
 908         int trafo_size = 1 << log2_trafo_size;
 909         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 910
 911         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 912     }
 913
 914     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 915         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 916         int scan_idx   = SCAN_DIAG;
 917         int scan_idx_c = SCAN_DIAG;
 918         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 919                          (s->sps->chroma_format_idc == 2 &&
 920                          (cbf_cb[1] || cbf_cr[1]));
 921
 922         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 923             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 924             if (lc->tu.cu_qp_delta != 0)
 925                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 926                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 927             lc->tu.is_cu_qp_delta_coded = 1;
 928
 929             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 930                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 931                 av_log(s->avctx, AV_LOG_ERROR,
 932                        "The cu_qp_delta %d is outside the valid range "
 933                        "[%d, %d].\n",
 934                        lc->tu.cu_qp_delta,
 935                        -(26 + s->sps->qp_bd_offset / 2),
 936                         (25 + s->sps->qp_bd_offset / 2));
 937                 return AVERROR_INVALIDDATA;
 938             }
 939
 940             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 941         }
 942
 943         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 944             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 945             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 946             if (cu_chroma_qp_offset_flag) {
 947                 int cu_chroma_qp_offset_idx  = 0;
 948                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 949                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 950                     av_log(s->avctx, AV_LOG_ERROR,
 951                         "cu_chroma_qp_offset_idx not yet tested.\n");
 952                 }
 953                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 954                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 955             } else {
 956                 lc->tu.cu_qp_offset_cb = 0;
 957                 lc->tu.cu_qp_offset_cr = 0;
 958             }
 959             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 960         }
 961
 962         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 963             if (lc->tu.intra_pred_mode >= 6 &&
 964                 lc->tu.intra_pred_mode <= 14) {
 965                 scan_idx = SCAN_VERT;
 966             } else if (lc->tu.intra_pred_mode >= 22 &&
 967                        lc->tu.intra_pred_mode <= 30) {
 968                 scan_idx = SCAN_HORIZ;
 969             }
 970
 971             if (lc->tu.intra_pred_mode_c >=  6 &&
 972                 lc->tu.intra_pred_mode_c <= 14) {
 973                 scan_idx_c = SCAN_VERT;
 974             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 975                        lc->tu.intra_pred_mode_c <= 30) {
 976                 scan_idx_c = SCAN_HORIZ;
 977             }
 978         }
 979
 980         lc->tu.cross_pf = 0;
 981
 982         if (cbf_luma)
 983             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 984         if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
 985             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 986             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 987             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 988                                 (lc->cu.pred_mode == MODE_INTER ||
 989                                  (lc->tu.chroma_mode_c ==  4)));
 990
 991             if (lc->tu.cross_pf) {
 992                 hls_cross_component_pred(s, 0);
 993             }
 994             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
 995                 if (lc->cu.pred_mode == MODE_INTRA) {
 996                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 997                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
 998                 }
 999                 if (cbf_cb[i])
1000                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1001                                                 log2_trafo_size_c, scan_idx_c, 1);
1002                 else
1003                     if (lc->tu.cross_pf) {
1004                         ptrdiff_t stride = s->frame->linesize[1];
1005                         int hshift = s->sps->hshift[1];
1006                         int vshift = s->sps->vshift[1];
1007                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1008                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1009                         int size = 1 << log2_trafo_size_c;
1010
1011                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1012                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
1013                         for (i = 0; i < (size * size); i++) {
1014                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1015                         }
1016                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1017                     }
1018             }
1019
1020             if (lc->tu.cross_pf) {
1021                 hls_cross_component_pred(s, 1);
1022             }
1023             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1024                 if (lc->cu.pred_mode == MODE_INTRA) {
1025                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1026                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1027                 }
1028                 if (cbf_cr[i])
1029                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1030                                                 log2_trafo_size_c, scan_idx_c, 2);
1031                 else
1032                     if (lc->tu.cross_pf) {
1033                         ptrdiff_t stride = s->frame->linesize[2];
1034                         int hshift = s->sps->hshift[2];
1035                         int vshift = s->sps->vshift[2];
1036                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1037                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1038                         int size = 1 << log2_trafo_size_c;
1039
1040                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1041                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1042                         for (i = 0; i < (size * size); i++) {
1043                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1044                         }
1045                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1046                     }
1047             }
1048         } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1049             int trafo_size_h = 1 << (log2_trafo_size + 1);
1050             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1051             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1052                 if (lc->cu.pred_mode == MODE_INTRA) {
1053                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1054                                                     trafo_size_h, trafo_size_v);
1055                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1056                 }
1057                 if (cbf_cb[i])
1058                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1059                                                 log2_trafo_size, scan_idx_c, 1);
1060             }
1061             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1062                 if (lc->cu.pred_mode == MODE_INTRA) {
1063                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1064                                                 trafo_size_h, trafo_size_v);
1065                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1066                 }
1067                 if (cbf_cr[i])
1068                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1069                                                 log2_trafo_size, scan_idx_c, 2);
1070             }
1071         }
1072     } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1073         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1074             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1075             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1076             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1077             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1078             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1079             if (s->sps->chroma_format_idc == 2) {
1080                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1081                                                 trafo_size_h, trafo_size_v);
1082                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1083                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1084             }
1085         } else if (blk_idx == 3) {
1086             int trafo_size_h = 1 << (log2_trafo_size + 1);
1087             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1088             ff_hevc_set_neighbour_available(s, xBase, yBase,
1089                                             trafo_size_h, trafo_size_v);
1090             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1091             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1092             if (s->sps->chroma_format_idc == 2) {
1093                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1094                                                 trafo_size_h, trafo_size_v);
1095                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1096                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1097             }
1098         }
1099     }
1100
1101     return 0;
1102 }
1103
1104 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1105 {
1106     int cb_size          = 1 << log2_cb_size;
1107     int log2_min_pu_size = s->sps->log2_min_pu_size;
1108
1109     int min_pu_width     = s->sps->min_pu_width;
1110     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1111     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1112     int i, j;
1113
1114     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1115         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1116             s->is_pcm[i + j * min_pu_width] = 2;
1117 }
1118
1119 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1120                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1121                               int log2_cb_size, int log2_trafo_size,
1122                               int trafo_depth, int blk_idx,
1123                               const int *base_cbf_cb, const int *base_cbf_cr)
1124 {
1125     HEVCLocalContext *lc = s->HEVClc;
1126     uint8_t split_transform_flag;
1127     int cbf_cb[2];
1128     int cbf_cr[2];
1129     int ret;
1130
1131     cbf_cb[0] = base_cbf_cb[0];
1132     cbf_cb[1] = base_cbf_cb[1];
1133     cbf_cr[0] = base_cbf_cr[0];
1134     cbf_cr[1] = base_cbf_cr[1];
1135
1136     if (lc->cu.intra_split_flag) {
1137         if (trafo_depth == 1) {
1138             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1139             if (s->sps->chroma_format_idc == 3) {
1140                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1141                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1142             } else {
1143                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1144                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1145             }
1146         }
1147     } else {
1148         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1149         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1150         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1151     }
1152
1153     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1154         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1155         trafo_depth     < lc->cu.max_trafo_depth       &&
1156         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1157         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1158     } else {
1159         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1160                           lc->cu.pred_mode == MODE_INTER &&
1161                           lc->cu.part_mode != PART_2Nx2N &&
1162                           trafo_depth == 0;
1163
1164         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1165                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1166                                inter_split;
1167     }
1168
1169     if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1170         if (trafo_depth == 0 || cbf_cb[0]) {
1171             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1172             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1173                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1174             }
1175         }
1176
1177         if (trafo_depth == 0 || cbf_cr[0]) {
1178             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1179             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1180                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1181             }
1182         }
1183     }
1184
1185     if (split_transform_flag) {
1186         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1187         const int x1 = x0 + trafo_size_split;
1188         const int y1 = y0 + trafo_size_split;
1189
1190 #define SUBDIVIDE(x, y, idx)                                                    \
1191 do {                                                                            \
1192     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1193                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1194                              cbf_cb, cbf_cr);                                   \
1195     if (ret < 0)                                                                \
1196         return ret;                                                             \
1197 } while (0)
1198
1199         SUBDIVIDE(x0, y0, 0);
1200         SUBDIVIDE(x1, y0, 1);
1201         SUBDIVIDE(x0, y1, 2);
1202         SUBDIVIDE(x1, y1, 3);
1203
1204 #undef SUBDIVIDE
1205     } else {
1206         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1207         int log2_min_tu_size = s->sps->log2_min_tb_size;
1208         int min_tu_width     = s->sps->min_tb_width;
1209         int cbf_luma         = 1;
1210
1211         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1212             cbf_cb[0] || cbf_cr[0] ||
1213             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1214             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1215         }
1216
1217         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1218                                  log2_cb_size, log2_trafo_size,
1219                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1220         if (ret < 0)
1221             return ret;
1222         // TODO: store cbf_luma somewhere else
1223         if (cbf_luma) {
1224             int i, j;
1225             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1226                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1227                     int x_tu = (x0 + j) >> log2_min_tu_size;
1228                     int y_tu = (y0 + i) >> log2_min_tu_size;
1229                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1230                 }
1231         }
1232         if (!s->sh.disable_deblocking_filter_flag) {
1233             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1234             if (s->pps->transquant_bypass_enable_flag &&
1235                 lc->cu.cu_transquant_bypass_flag)
1236                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1237         }
1238     }
1239     return 0;
1240 }
1241
1242 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1243 {
1244     HEVCLocalContext *lc = s->HEVClc;
1245     GetBitContext gb;
1246     int cb_size   = 1 << log2_cb_size;
1247     int stride0   = s->frame->linesize[0];
1248     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1249     int   stride1 = s->frame->linesize[1];
1250     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1251     int   stride2 = s->frame->linesize[2];
1252     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1253
1254     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1255                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1256                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1257                           s->sps->pcm.bit_depth_chroma;
1258     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1259     int ret;
1260
1261     if (!s->sh.disable_deblocking_filter_flag)
1262         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1263
1264     ret = init_get_bits(&gb, pcm, length);
1265     if (ret < 0)
1266         return ret;
1267
1268     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1269     if (s->sps->chroma_format_idc) {
1270         s->hevcdsp.put_pcm(dst1, stride1,
1271                            cb_size >> s->sps->hshift[1],
1272                            cb_size >> s->sps->vshift[1],
1273                            &gb, s->sps->pcm.bit_depth_chroma);
1274         s->hevcdsp.put_pcm(dst2, stride2,
1275                            cb_size >> s->sps->hshift[2],
1276                            cb_size >> s->sps->vshift[2],
1277                            &gb, s->sps->pcm.bit_depth_chroma);
1278     }
1279
1280     return 0;
1281 }
1282
1283 /**
1284  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1285  *
1286  * @param s HEVC decoding context
1287  * @param dst target buffer for block data at block position
1288  * @param dststride stride of the dst buffer
1289  * @param ref reference picture buffer at origin (0, 0)
1290  * @param mv motion vector (relative to block position) to get pixel data from
1291  * @param x_off horizontal position of block from origin (0, 0)
1292  * @param y_off vertical position of block from origin (0, 0)
1293  * @param block_w width of block
1294  * @param block_h height of block
1295  * @param luma_weight weighting factor applied to the luma prediction
1296  * @param luma_offset additive offset applied to the luma prediction value
1297  */
1298
1299 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1300                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1301                         int block_w, int block_h, int luma_weight, int luma_offset)
1302 {
1303     HEVCLocalContext *lc = s->HEVClc;
1304     uint8_t *src         = ref->data[0];
1305     ptrdiff_t srcstride  = ref->linesize[0];
1306     int pic_width        = s->sps->width;
1307     int pic_height       = s->sps->height;
1308     int mx               = mv->x & 3;
1309     int my               = mv->y & 3;
1310     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1311                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1312     int idx              = ff_hevc_pel_weight[block_w];
1313
1314     x_off += mv->x >> 2;
1315     y_off += mv->y >> 2;
1316     src   += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1317
1318     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1319         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1320         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1321         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1322         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1323         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1324
1325         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1326                                  edge_emu_stride, srcstride,
1327                                  block_w + QPEL_EXTRA,
1328                                  block_h + QPEL_EXTRA,
1329                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1330                                  pic_width, pic_height);
1331         src = lc->edge_emu_buffer + buf_offset;
1332         srcstride = edge_emu_stride;
1333     }
1334
1335     if (!weight_flag)
1336         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1337                                                       block_h, mx, my, block_w);
1338     else
1339         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1340                                                         block_h, s->sh.luma_log2_weight_denom,
1341                                                         luma_weight, luma_offset, mx, my, block_w);
1342 }
1343
1344 /**
1345  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1346  *
1347  * @param s HEVC decoding context
1348  * @param dst target buffer for block data at block position
1349  * @param dststride stride of the dst buffer
1350  * @param ref0 reference picture0 buffer at origin (0, 0)
1351  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1352  * @param x_off horizontal position of block from origin (0, 0)
1353  * @param y_off vertical position of block from origin (0, 0)
1354  * @param block_w width of block
1355  * @param block_h height of block
1356  * @param ref1 reference picture1 buffer at origin (0, 0)
1357  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1358  * @param current_mv current motion vector structure
1359  */
1360  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1361                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1362                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1363 {
1364     HEVCLocalContext *lc = s->HEVClc;
1365     ptrdiff_t src0stride  = ref0->linesize[0];
1366     ptrdiff_t src1stride  = ref1->linesize[0];
1367     int pic_width        = s->sps->width;
1368     int pic_height       = s->sps->height;
1369     int mx0              = mv0->x & 3;
1370     int my0              = mv0->y & 3;
1371     int mx1              = mv1->x & 3;
1372     int my1              = mv1->y & 3;
1373     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1374                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1375     int x_off0           = x_off + (mv0->x >> 2);
1376     int y_off0           = y_off + (mv0->y >> 2);
1377     int x_off1           = x_off + (mv1->x >> 2);
1378     int y_off1           = y_off + (mv1->y >> 2);
1379     int idx              = ff_hevc_pel_weight[block_w];
1380
1381     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1382     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1383
1384     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1385         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1386         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1387         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1388         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1389         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1390
1391         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1392                                  edge_emu_stride, src0stride,
1393                                  block_w + QPEL_EXTRA,
1394                                  block_h + QPEL_EXTRA,
1395                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1396                                  pic_width, pic_height);
1397         src0 = lc->edge_emu_buffer + buf_offset;
1398         src0stride = edge_emu_stride;
1399     }
1400
1401     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1402         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1403         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1404         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1405         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1406         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1407
1408         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1409                                  edge_emu_stride, src1stride,
1410                                  block_w + QPEL_EXTRA,
1411                                  block_h + QPEL_EXTRA,
1412                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1413                                  pic_width, pic_height);
1414         src1 = lc->edge_emu_buffer2 + buf_offset;
1415         src1stride = edge_emu_stride;
1416     }
1417
1418     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1419                                                 block_h, mx0, my0, block_w);
1420     if (!weight_flag)
1421         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1422                                                        block_h, mx1, my1, block_w);
1423     else
1424         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1425                                                          block_h, s->sh.luma_log2_weight_denom,
1426                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1427                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1428                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1429                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1430                                                          mx1, my1, block_w);
1431
1432 }
1433
1434 /**
1435  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1436  *
1437  * @param s HEVC decoding context
1438  * @param dst1 target buffer for block data at block position (U plane)
1439  * @param dst2 target buffer for block data at block position (V plane)
1440  * @param dststride stride of the dst1 and dst2 buffers
1441  * @param ref reference picture buffer at origin (0, 0)
1442  * @param mv motion vector (relative to block position) to get pixel data from
1443  * @param x_off horizontal position of block from origin (0, 0)
1444  * @param y_off vertical position of block from origin (0, 0)
1445  * @param block_w width of block
1446  * @param block_h height of block
1447  * @param chroma_weight weighting factor applied to the chroma prediction
1448  * @param chroma_offset additive offset applied to the chroma prediction value
1449  */
1450
1451 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1452                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1453                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1454 {
1455     HEVCLocalContext *lc = s->HEVClc;
1456     int pic_width        = s->sps->width >> s->sps->hshift[1];
1457     int pic_height       = s->sps->height >> s->sps->vshift[1];
1458     const Mv *mv         = &current_mv->mv[reflist];
1459     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1460                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1461     int idx              = ff_hevc_pel_weight[block_w];
1462     int hshift           = s->sps->hshift[1];
1463     int vshift           = s->sps->vshift[1];
1464     intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
1465     intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
1466     intptr_t _mx         = mx << (1 - hshift);
1467     intptr_t _my         = my << (1 - vshift);
1468
1469     x_off += mv->x >> (2 + hshift);
1470     y_off += mv->y >> (2 + vshift);
1471     src0  += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1472
1473     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1474         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1475         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1476         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1477         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1478         int buf_offset0 = EPEL_EXTRA_BEFORE *
1479                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1480         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1481                                  edge_emu_stride, srcstride,
1482                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1483                                  x_off - EPEL_EXTRA_BEFORE,
1484                                  y_off - EPEL_EXTRA_BEFORE,
1485                                  pic_width, pic_height);
1486
1487         src0 = lc->edge_emu_buffer + buf_offset0;
1488         srcstride = edge_emu_stride;
1489     }
1490     if (!weight_flag)
1491         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1492                                                   block_h, _mx, _my, block_w);
1493     else
1494         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1495                                                         block_h, s->sh.chroma_log2_weight_denom,
1496                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1497 }
1498
1499 /**
1500  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1501  *
1502  * @param s HEVC decoding context
1503  * @param dst target buffer for block data at block position
1504  * @param dststride stride of the dst buffer
1505  * @param ref0 reference picture0 buffer at origin (0, 0)
1506  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1507  * @param x_off horizontal position of block from origin (0, 0)
1508  * @param y_off vertical position of block from origin (0, 0)
1509  * @param block_w width of block
1510  * @param block_h height of block
1511  * @param ref1 reference picture1 buffer at origin (0, 0)
1512  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1513  * @param current_mv current motion vector structure
1514  * @param cidx chroma component(cb, cr)
1515  */
1516 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1517                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1518 {
1519     HEVCLocalContext *lc = s->HEVClc;
1520     uint8_t *src1        = ref0->data[cidx+1];
1521     uint8_t *src2        = ref1->data[cidx+1];
1522     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1523     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1524     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1525                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1526     int pic_width        = s->sps->width >> s->sps->hshift[1];
1527     int pic_height       = s->sps->height >> s->sps->vshift[1];
1528     Mv *mv0              = &current_mv->mv[0];
1529     Mv *mv1              = &current_mv->mv[1];
1530     int hshift = s->sps->hshift[1];
1531     int vshift = s->sps->vshift[1];
1532
1533     intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
1534     intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
1535     intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
1536     intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
1537     intptr_t _mx0 = mx0 << (1 - hshift);
1538     intptr_t _my0 = my0 << (1 - vshift);
1539     intptr_t _mx1 = mx1 << (1 - hshift);
1540     intptr_t _my1 = my1 << (1 - vshift);
1541
1542     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1543     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1544     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1545     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1546     int idx = ff_hevc_pel_weight[block_w];
1547     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1548     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1549
1550     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1551         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1552         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1553         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1554         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1555         int buf_offset1 = EPEL_EXTRA_BEFORE *
1556                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1557
1558         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1559                                  edge_emu_stride, src1stride,
1560                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1561                                  x_off0 - EPEL_EXTRA_BEFORE,
1562                                  y_off0 - EPEL_EXTRA_BEFORE,
1563                                  pic_width, pic_height);
1564
1565         src1 = lc->edge_emu_buffer + buf_offset1;
1566         src1stride = edge_emu_stride;
1567     }
1568
1569     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1570         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1571         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1572         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1573         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1574         int buf_offset1 = EPEL_EXTRA_BEFORE *
1575                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1576
1577         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1578                                  edge_emu_stride, src2stride,
1579                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1580                                  x_off1 - EPEL_EXTRA_BEFORE,
1581                                  y_off1 - EPEL_EXTRA_BEFORE,
1582                                  pic_width, pic_height);
1583
1584         src2 = lc->edge_emu_buffer2 + buf_offset1;
1585         src2stride = edge_emu_stride;
1586     }
1587
1588     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1589                                                 block_h, _mx0, _my0, block_w);
1590     if (!weight_flag)
1591         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1592                                                        src2, src2stride, lc->tmp,
1593                                                        block_h, _mx1, _my1, block_w);
1594     else
1595         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1596                                                          src2, src2stride, lc->tmp,
1597                                                          block_h,
1598                                                          s->sh.chroma_log2_weight_denom,
1599                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1600                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1601                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1602                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1603                                                          _mx1, _my1, block_w);
1604 }
1605
1606 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1607                                 const Mv *mv, int y0, int height)
1608 {
1609     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1610
1611     if (s->threads_type == FF_THREAD_FRAME )
1612         ff_thread_await_progress(&ref->tf, y, 0);
1613 }
1614
1615 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1616                                   int nPbH, int log2_cb_size, int part_idx,
1617                                   int merge_idx, MvField *mv)
1618 {
1619     HEVCLocalContext *lc = s->HEVClc;
1620     enum InterPredIdc inter_pred_idc = PRED_L0;
1621     int mvp_flag;
1622
1623     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1624     mv->pred_flag = 0;
1625     if (s->sh.slice_type == B_SLICE)
1626         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1627
1628     if (inter_pred_idc != PRED_L1) {
1629         if (s->sh.nb_refs[L0])
1630             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1631
1632         mv->pred_flag = PF_L0;
1633         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1634         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1635         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1636                                  part_idx, merge_idx, mv, mvp_flag, 0);
1637         mv->mv[0].x += lc->pu.mvd.x;
1638         mv->mv[0].y += lc->pu.mvd.y;
1639     }
1640
1641     if (inter_pred_idc != PRED_L0) {
1642         if (s->sh.nb_refs[L1])
1643             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1644
1645         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1646             AV_ZERO32(&lc->pu.mvd);
1647         } else {
1648             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1649         }
1650
1651         mv->pred_flag += PF_L1;
1652         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1653         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1654                                  part_idx, merge_idx, mv, mvp_flag, 1);
1655         mv->mv[1].x += lc->pu.mvd.x;
1656         mv->mv[1].y += lc->pu.mvd.y;
1657     }
1658 }
1659
1660 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1661                                 int nPbW, int nPbH,
1662                                 int log2_cb_size, int partIdx, int idx)
1663 {
1664 #define POS(c_idx, x, y)                                                              \
1665     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1666                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1667     HEVCLocalContext *lc = s->HEVClc;
1668     int merge_idx = 0;
1669     struct MvField current_mv = {{{ 0 }}};
1670
1671     int min_pu_width = s->sps->min_pu_width;
1672
1673     MvField *tab_mvf = s->ref->tab_mvf;
1674     RefPicList  *refPicList = s->ref->refPicList;
1675     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1676     uint8_t *dst0 = POS(0, x0, y0);
1677     uint8_t *dst1 = POS(1, x0, y0);
1678     uint8_t *dst2 = POS(2, x0, y0);
1679     int log2_min_cb_size = s->sps->log2_min_cb_size;
1680     int min_cb_width     = s->sps->min_cb_width;
1681     int x_cb             = x0 >> log2_min_cb_size;
1682     int y_cb             = y0 >> log2_min_cb_size;
1683     int x_pu, y_pu;
1684     int i, j;
1685
1686     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1687
1688     if (!skip_flag)
1689         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1690
1691     if (skip_flag || lc->pu.merge_flag) {
1692         if (s->sh.max_num_merge_cand > 1)
1693             merge_idx = ff_hevc_merge_idx_decode(s);
1694         else
1695             merge_idx = 0;
1696
1697         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1698                                    partIdx, merge_idx, &current_mv);
1699     } else {
1700         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1701                               partIdx, merge_idx, &current_mv);
1702     }
1703
1704     x_pu = x0 >> s->sps->log2_min_pu_size;
1705     y_pu = y0 >> s->sps->log2_min_pu_size;
1706
1707     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1708         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1709             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1710
1711     if (current_mv.pred_flag & PF_L0) {
1712         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1713         if (!ref0)
1714             return;
1715         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1716     }
1717     if (current_mv.pred_flag & PF_L1) {
1718         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1719         if (!ref1)
1720             return;
1721         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1722     }
1723
1724     if (current_mv.pred_flag == PF_L0) {
1725         int x0_c = x0 >> s->sps->hshift[1];
1726         int y0_c = y0 >> s->sps->vshift[1];
1727         int nPbW_c = nPbW >> s->sps->hshift[1];
1728         int nPbH_c = nPbH >> s->sps->vshift[1];
1729
1730         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1731                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1732                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1733                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1734
1735         if (s->sps->chroma_format_idc) {
1736             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1737                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1738                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1739             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1740                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1741                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1742         }
1743     } else if (current_mv.pred_flag == PF_L1) {
1744         int x0_c = x0 >> s->sps->hshift[1];
1745         int y0_c = y0 >> s->sps->vshift[1];
1746         int nPbW_c = nPbW >> s->sps->hshift[1];
1747         int nPbH_c = nPbH >> s->sps->vshift[1];
1748
1749         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1750                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1751                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1752                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1753
1754         if (s->sps->chroma_format_idc) {
1755             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1756                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1757                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1758
1759             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1760                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1761                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1762         }
1763     } else if (current_mv.pred_flag == PF_BI) {
1764         int x0_c = x0 >> s->sps->hshift[1];
1765         int y0_c = y0 >> s->sps->vshift[1];
1766         int nPbW_c = nPbW >> s->sps->hshift[1];
1767         int nPbH_c = nPbH >> s->sps->vshift[1];
1768
1769         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1770                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1771                    ref1->frame, &current_mv.mv[1], &current_mv);
1772
1773         if (s->sps->chroma_format_idc) {
1774             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1775                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1776
1777             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1778                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1779         }
1780     }
1781 }
1782
1783 /**
1784  * 8.4.1
1785  */
1786 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1787                                 int prev_intra_luma_pred_flag)
1788 {
1789     HEVCLocalContext *lc = s->HEVClc;
1790     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1791     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1792     int min_pu_width     = s->sps->min_pu_width;
1793     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1794     int x0b              = x0 & ((1 << s->sps->log2_ctb_size) - 1);
1795     int y0b              = y0 & ((1 << s->sps->log2_ctb_size) - 1);
1796
1797     int cand_up   = (lc->ctb_up_flag || y0b) ?
1798                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1799     int cand_left = (lc->ctb_left_flag || x0b) ?
1800                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1801
1802     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1803
1804     MvField *tab_mvf = s->ref->tab_mvf;
1805     int intra_pred_mode;
1806     int candidate[3];
1807     int i, j;
1808
1809     // intra_pred_mode prediction does not cross vertical CTB boundaries
1810     if ((y0 - 1) < y_ctb)
1811         cand_up = INTRA_DC;
1812
1813     if (cand_left == cand_up) {
1814         if (cand_left < 2) {
1815             candidate[0] = INTRA_PLANAR;
1816             candidate[1] = INTRA_DC;
1817             candidate[2] = INTRA_ANGULAR_26;
1818         } else {
1819             candidate[0] = cand_left;
1820             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1821             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1822         }
1823     } else {
1824         candidate[0] = cand_left;
1825         candidate[1] = cand_up;
1826         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1827             candidate[2] = INTRA_PLANAR;
1828         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1829             candidate[2] = INTRA_DC;
1830         } else {
1831             candidate[2] = INTRA_ANGULAR_26;
1832         }
1833     }
1834
1835     if (prev_intra_luma_pred_flag) {
1836         intra_pred_mode = candidate[lc->pu.mpm_idx];
1837     } else {
1838         if (candidate[0] > candidate[1])
1839             FFSWAP(uint8_t, candidate[0], candidate[1]);
1840         if (candidate[0] > candidate[2])
1841             FFSWAP(uint8_t, candidate[0], candidate[2]);
1842         if (candidate[1] > candidate[2])
1843             FFSWAP(uint8_t, candidate[1], candidate[2]);
1844
1845         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1846         for (i = 0; i < 3; i++)
1847             if (intra_pred_mode >= candidate[i])
1848                 intra_pred_mode++;
1849     }
1850
1851     /* write the intra prediction units into the mv array */
1852     if (!size_in_pus)
1853         size_in_pus = 1;
1854     for (i = 0; i < size_in_pus; i++) {
1855         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1856                intra_pred_mode, size_in_pus);
1857
1858         for (j = 0; j < size_in_pus; j++) {
1859             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1860         }
1861     }
1862
1863     return intra_pred_mode;
1864 }
1865
1866 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1867                                           int log2_cb_size, int ct_depth)
1868 {
1869     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1870     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1871     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1872     int y;
1873
1874     for (y = 0; y < length; y++)
1875         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1876                ct_depth, length);
1877 }
1878
1879 static const uint8_t tab_mode_idx[] = {
1880      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1881     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1882
1883 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1884                                   int log2_cb_size)
1885 {
1886     HEVCLocalContext *lc = s->HEVClc;
1887     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1888     uint8_t prev_intra_luma_pred_flag[4];
1889     int split   = lc->cu.part_mode == PART_NxN;
1890     int pb_size = (1 << log2_cb_size) >> split;
1891     int side    = split + 1;
1892     int chroma_mode;
1893     int i, j;
1894
1895     for (i = 0; i < side; i++)
1896         for (j = 0; j < side; j++)
1897             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1898
1899     for (i = 0; i < side; i++) {
1900         for (j = 0; j < side; j++) {
1901             if (prev_intra_luma_pred_flag[2 * i + j])
1902                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1903             else
1904                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1905
1906             lc->pu.intra_pred_mode[2 * i + j] =
1907                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1908                                      prev_intra_luma_pred_flag[2 * i + j]);
1909         }
1910     }
1911
1912     if (s->sps->chroma_format_idc == 3) {
1913         for (i = 0; i < side; i++) {
1914             for (j = 0; j < side; j++) {
1915                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1916                 if (chroma_mode != 4) {
1917                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1918                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1919                     else
1920                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1921                 } else {
1922                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1923                 }
1924             }
1925         }
1926     } else if (s->sps->chroma_format_idc == 2) {
1927         int mode_idx;
1928         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1929         if (chroma_mode != 4) {
1930             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1931                 mode_idx = 34;
1932             else
1933                 mode_idx = intra_chroma_table[chroma_mode];
1934         } else {
1935             mode_idx = lc->pu.intra_pred_mode[0];
1936         }
1937         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1938     } else if (s->sps->chroma_format_idc != 0) {
1939         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1940         if (chroma_mode != 4) {
1941             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1942                 lc->pu.intra_pred_mode_c[0] = 34;
1943             else
1944                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1945         } else {
1946             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1947         }
1948     }
1949 }
1950
1951 static void intra_prediction_unit_default_value(HEVCContext *s,
1952                                                 int x0, int y0,
1953                                                 int log2_cb_size)
1954 {
1955     HEVCLocalContext *lc = s->HEVClc;
1956     int pb_size          = 1 << log2_cb_size;
1957     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1958     int min_pu_width     = s->sps->min_pu_width;
1959     MvField *tab_mvf     = s->ref->tab_mvf;
1960     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1961     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1962     int j, k;
1963
1964     if (size_in_pus == 0)
1965         size_in_pus = 1;
1966     for (j = 0; j < size_in_pus; j++)
1967         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1968     if (lc->cu.pred_mode == MODE_INTRA)
1969         for (j = 0; j < size_in_pus; j++)
1970             for (k = 0; k < size_in_pus; k++)
1971                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1972 }
1973
1974 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1975 {
1976     int cb_size          = 1 << log2_cb_size;
1977     HEVCLocalContext *lc = s->HEVClc;
1978     int log2_min_cb_size = s->sps->log2_min_cb_size;
1979     int length           = cb_size >> log2_min_cb_size;
1980     int min_cb_width     = s->sps->min_cb_width;
1981     int x_cb             = x0 >> log2_min_cb_size;
1982     int y_cb             = y0 >> log2_min_cb_size;
1983     int idx              = log2_cb_size - 2;
1984     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1985     int x, y, ret;
1986
1987     lc->cu.x                = x0;
1988     lc->cu.y                = y0;
1989     lc->cu.pred_mode        = MODE_INTRA;
1990     lc->cu.part_mode        = PART_2Nx2N;
1991     lc->cu.intra_split_flag = 0;
1992
1993     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
1994     for (x = 0; x < 4; x++)
1995         lc->pu.intra_pred_mode[x] = 1;
1996     if (s->pps->transquant_bypass_enable_flag) {
1997         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
1998         if (lc->cu.cu_transquant_bypass_flag)
1999             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2000     } else
2001         lc->cu.cu_transquant_bypass_flag = 0;
2002
2003     if (s->sh.slice_type != I_SLICE) {
2004         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2005
2006         x = y_cb * min_cb_width + x_cb;
2007         for (y = 0; y < length; y++) {
2008             memset(&s->skip_flag[x], skip_flag, length);
2009             x += min_cb_width;
2010         }
2011         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2012     } else {
2013         x = y_cb * min_cb_width + x_cb;
2014         for (y = 0; y < length; y++) {
2015             memset(&s->skip_flag[x], 0, length);
2016             x += min_cb_width;
2017         }
2018     }
2019
2020     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2021         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2022         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2023
2024         if (!s->sh.disable_deblocking_filter_flag)
2025             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2026     } else {
2027         int pcm_flag = 0;
2028
2029         if (s->sh.slice_type != I_SLICE)
2030             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2031         if (lc->cu.pred_mode != MODE_INTRA ||
2032             log2_cb_size == s->sps->log2_min_cb_size) {
2033             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2034             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2035                                       lc->cu.pred_mode == MODE_INTRA;
2036         }
2037
2038         if (lc->cu.pred_mode == MODE_INTRA) {
2039             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2040                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2041                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2042                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2043             }
2044             if (pcm_flag) {
2045                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2046                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2047                 if (s->sps->pcm.loop_filter_disable_flag)
2048                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2049
2050                 if (ret < 0)
2051                     return ret;
2052             } else {
2053                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2054             }
2055         } else {
2056             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2057             switch (lc->cu.part_mode) {
2058             case PART_2Nx2N:
2059                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2060                 break;
2061             case PART_2NxN:
2062                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2063                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2064                 break;
2065             case PART_Nx2N:
2066                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2067                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2068                 break;
2069             case PART_2NxnU:
2070                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2071                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2072                 break;
2073             case PART_2NxnD:
2074                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2075                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2076                 break;
2077             case PART_nLx2N:
2078                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2079                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2080                 break;
2081             case PART_nRx2N:
2082                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2083                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2084                 break;
2085             case PART_NxN:
2086                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2087                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2088                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2089                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2090                 break;
2091             }
2092         }
2093
2094         if (!pcm_flag) {
2095             int rqt_root_cbf = 1;
2096
2097             if (lc->cu.pred_mode != MODE_INTRA &&
2098                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2099                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2100             }
2101             if (rqt_root_cbf) {
2102                 const static int cbf[2] = { 0 };
2103                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2104                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2105                                          s->sps->max_transform_hierarchy_depth_inter;
2106                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2107                                          log2_cb_size,
2108                                          log2_cb_size, 0, 0, cbf, cbf);
2109                 if (ret < 0)
2110                     return ret;
2111             } else {
2112                 if (!s->sh.disable_deblocking_filter_flag)
2113                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2114             }
2115         }
2116     }
2117
2118     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2119         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2120
2121     x = y_cb * min_cb_width + x_cb;
2122     for (y = 0; y < length; y++) {
2123         memset(&s->qp_y_tab[x], lc->qp_y, length);
2124         x += min_cb_width;
2125     }
2126
2127     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2128        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2129         lc->qPy_pred = lc->qp_y;
2130     }
2131
2132     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2133
2134     return 0;
2135 }
2136
2137 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2138                                int log2_cb_size, int cb_depth)
2139 {
2140     HEVCLocalContext *lc = s->HEVClc;
2141     const int cb_size    = 1 << log2_cb_size;
2142     int ret;
2143     int split_cu;
2144
2145     lc->ct_depth = cb_depth;
2146     if (x0 + cb_size <= s->sps->width  &&
2147         y0 + cb_size <= s->sps->height &&
2148         log2_cb_size > s->sps->log2_min_cb_size) {
2149         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2150     } else {
2151         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2152     }
2153     if (s->pps->cu_qp_delta_enabled_flag &&
2154         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2155         lc->tu.is_cu_qp_delta_coded = 0;
2156         lc->tu.cu_qp_delta          = 0;
2157     }
2158
2159     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2160         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2161         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2162     }
2163
2164     if (split_cu) {
2165         int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2166         const int cb_size_split = cb_size >> 1;
2167         const int x1 = x0 + cb_size_split;
2168         const int y1 = y0 + cb_size_split;
2169
2170         int more_data = 0;
2171
2172         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2173         if (more_data < 0)
2174             return more_data;
2175
2176         if (more_data && x1 < s->sps->width) {
2177             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2178             if (more_data < 0)
2179                 return more_data;
2180         }
2181         if (more_data && y1 < s->sps->height) {
2182             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2183             if (more_data < 0)
2184                 return more_data;
2185         }
2186         if (more_data && x1 < s->sps->width &&
2187             y1 < s->sps->height) {
2188             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2189             if (more_data < 0)
2190                 return more_data;
2191         }
2192
2193         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2194             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2195             lc->qPy_pred = lc->qp_y;
2196
2197         if (more_data)
2198             return ((x1 + cb_size_split) < s->sps->width ||
2199                     (y1 + cb_size_split) < s->sps->height);
2200         else
2201             return 0;
2202     } else {
2203         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2204         if (ret < 0)
2205             return ret;
2206         if ((!((x0 + cb_size) %
2207                (1 << (s->sps->log2_ctb_size))) ||
2208              (x0 + cb_size >= s->sps->width)) &&
2209             (!((y0 + cb_size) %
2210                (1 << (s->sps->log2_ctb_size))) ||
2211              (y0 + cb_size >= s->sps->height))) {
2212             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2213             return !end_of_slice_flag;
2214         } else {
2215             return 1;
2216         }
2217     }
2218
2219     return 0;
2220 }
2221
2222 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2223                                  int ctb_addr_ts)
2224 {
2225     HEVCLocalContext *lc  = s->HEVClc;
2226     int ctb_size          = 1 << s->sps->log2_ctb_size;
2227     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2228     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2229
2230     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2231
2232     if (s->pps->entropy_coding_sync_enabled_flag) {
2233         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2234             lc->first_qp_group = 1;
2235         lc->end_of_tiles_x = s->sps->width;
2236     } else if (s->pps->tiles_enabled_flag) {
2237         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2238             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2239             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2240             lc->first_qp_group   = 1;
2241         }
2242     } else {
2243         lc->end_of_tiles_x = s->sps->width;
2244     }
2245
2246     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2247
2248     lc->boundary_flags = 0;
2249     if (s->pps->tiles_enabled_flag) {
2250         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2251             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2252         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2253             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2254         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2255             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2256         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2257             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2258     } else {
2259         if (!ctb_addr_in_slice > 0)
2260             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2261         if (ctb_addr_in_slice < s->sps->ctb_width)
2262             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2263     }
2264
2265     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2266     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2267     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2268     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2269 }
2270
2271 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2272 {
2273     HEVCContext *s  = avctxt->priv_data;
2274     int ctb_size    = 1 << s->sps->log2_ctb_size;
2275     int more_data   = 1;
2276     int x_ctb       = 0;
2277     int y_ctb       = 0;
2278     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2279
2280     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2281         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2282         return AVERROR_INVALIDDATA;
2283     }
2284
2285     if (s->sh.dependent_slice_segment_flag) {
2286         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2287         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2288             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2289             return AVERROR_INVALIDDATA;
2290         }
2291     }
2292
2293     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2294         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2295
2296         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2297         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2298         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2299
2300         ff_hevc_cabac_init(s, ctb_addr_ts);
2301
2302         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2303
2304         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2305         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2306         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2307
2308         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2309         if (more_data < 0) {
2310             s->tab_slice_address[ctb_addr_rs] = -1;
2311             return more_data;
2312         }
2313
2314
2315         ctb_addr_ts++;
2316         ff_hevc_save_states(s, ctb_addr_ts);
2317         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2318     }
2319
2320     if (x_ctb + ctb_size >= s->sps->width &&
2321         y_ctb + ctb_size >= s->sps->height)
2322         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2323
2324     return ctb_addr_ts;
2325 }
2326
2327 static int hls_slice_data(HEVCContext *s)
2328 {
2329     int arg[2];
2330     int ret[2];
2331
2332     arg[0] = 0;
2333     arg[1] = 1;
2334
2335     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2336     return ret[0];
2337 }
2338 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2339 {
2340     HEVCContext *s1  = avctxt->priv_data, *s;
2341     HEVCLocalContext *lc;
2342     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2343     int more_data   = 1;
2344     int *ctb_row_p    = input_ctb_row;
2345     int ctb_row = ctb_row_p[job];
2346     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2347     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2348     int thread = ctb_row % s1->threads_number;
2349     int ret;
2350
2351     s = s1->sList[self_id];
2352     lc = s->HEVClc;
2353
2354     if(ctb_row) {
2355         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2356
2357         if (ret < 0)
2358             return ret;
2359         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2360     }
2361
2362     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2363         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2364         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2365
2366         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2367
2368         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2369
2370         if (avpriv_atomic_int_get(&s1->wpp_err)){
2371             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2372             return 0;
2373         }
2374
2375         ff_hevc_cabac_init(s, ctb_addr_ts);
2376         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2377         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2378
2379         if (more_data < 0) {
2380             s->tab_slice_address[ctb_addr_rs] = -1;
2381             return more_data;
2382         }
2383
2384         ctb_addr_ts++;
2385
2386         ff_hevc_save_states(s, ctb_addr_ts);
2387         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2388         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2389
2390         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2391             avpriv_atomic_int_set(&s1->wpp_err,  1);
2392             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2393             return 0;
2394         }
2395
2396         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2397             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2398             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2399             return ctb_addr_ts;
2400         }
2401         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2402         x_ctb+=ctb_size;
2403
2404         if(x_ctb >= s->sps->width) {
2405             break;
2406         }
2407     }
2408     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2409
2410     return 0;
2411 }
2412
2413 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2414 {
2415     HEVCLocalContext *lc = s->HEVClc;
2416     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2417     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2418     int offset;
2419     int startheader, cmpt = 0;
2420     int i, j, res = 0;
2421
2422     if (!ret || !arg) {
2423         av_free(ret);
2424         av_free(arg);
2425         return AVERROR(ENOMEM);
2426     }
2427
2428
2429     if (!s->sList[1]) {
2430         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2431
2432
2433         for (i = 1; i < s->threads_number; i++) {
2434             s->sList[i] = av_malloc(sizeof(HEVCContext));
2435             memcpy(s->sList[i], s, sizeof(HEVCContext));
2436             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2437             s->sList[i]->HEVClc = s->HEVClcList[i];
2438         }
2439     }
2440
2441     offset = (lc->gb.index >> 3);
2442
2443     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2444         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2445             startheader--;
2446             cmpt++;
2447         }
2448     }
2449
2450     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2451         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2452         for (j = 0, cmpt = 0, startheader = offset
2453              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2454             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2455                 startheader--;
2456                 cmpt++;
2457             }
2458         }
2459         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2460         s->sh.offset[i - 1] = offset;
2461
2462     }
2463     if (s->sh.num_entry_point_offsets != 0) {
2464         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2465         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2466         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2467
2468     }
2469     s->data = nal;
2470
2471     for (i = 1; i < s->threads_number; i++) {
2472         s->sList[i]->HEVClc->first_qp_group = 1;
2473         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2474         memcpy(s->sList[i], s, sizeof(HEVCContext));
2475         s->sList[i]->HEVClc = s->HEVClcList[i];
2476     }
2477
2478     avpriv_atomic_int_set(&s->wpp_err, 0);
2479     ff_reset_entries(s->avctx);
2480
2481     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2482         arg[i] = i;
2483         ret[i] = 0;
2484     }
2485
2486     if (s->pps->entropy_coding_sync_enabled_flag)
2487         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2488
2489     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2490         res += ret[i];
2491     av_free(ret);
2492     av_free(arg);
2493     return res;
2494 }
2495
2496 /**
2497  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2498  * 0 if the unit should be skipped, 1 otherwise
2499  */
2500 static int hls_nal_unit(HEVCContext *s)
2501 {
2502     GetBitContext *gb = &s->HEVClc->gb;
2503     int nuh_layer_id;
2504
2505     if (get_bits1(gb) != 0)
2506         return AVERROR_INVALIDDATA;
2507
2508     s->nal_unit_type = get_bits(gb, 6);
2509
2510     nuh_layer_id   = get_bits(gb, 6);
2511     s->temporal_id = get_bits(gb, 3) - 1;
2512     if (s->temporal_id < 0)
2513         return AVERROR_INVALIDDATA;
2514
2515     av_log(s->avctx, AV_LOG_DEBUG,
2516            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2517            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2518
2519     return nuh_layer_id == 0;
2520 }
2521
2522 static int set_side_data(HEVCContext *s)
2523 {
2524     AVFrame *out = s->ref->frame;
2525
2526     if (s->sei_frame_packing_present &&
2527         s->frame_packing_arrangement_type >= 3 &&
2528         s->frame_packing_arrangement_type <= 5 &&
2529         s->content_interpretation_type > 0 &&
2530         s->content_interpretation_type < 3) {
2531         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2532         if (!stereo)
2533             return AVERROR(ENOMEM);
2534
2535         switch (s->frame_packing_arrangement_type) {
2536         case 3:
2537             if (s->quincunx_subsampling)
2538                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2539             else
2540                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2541             break;
2542         case 4:
2543             stereo->type = AV_STEREO3D_TOPBOTTOM;
2544             break;
2545         case 5:
2546             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2547             break;
2548         }
2549
2550         if (s->content_interpretation_type == 2)
2551             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2552     }
2553
2554     if (s->sei_display_orientation_present &&
2555         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2556         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2557         AVFrameSideData *rotation = av_frame_new_side_data(out,
2558                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2559                                                            sizeof(int32_t) * 9);
2560         if (!rotation)
2561             return AVERROR(ENOMEM);
2562
2563         av_display_rotation_set((int32_t *)rotation->data, angle);
2564         av_display_matrix_flip((int32_t *)rotation->data,
2565                                s->sei_hflip, s->sei_vflip);
2566     }
2567
2568     return 0;
2569 }
2570
2571 static int hevc_frame_start(HEVCContext *s)
2572 {
2573     HEVCLocalContext *lc = s->HEVClc;
2574     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2575                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2576     int ret;
2577
2578     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2579     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2580     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2581     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2582     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2583
2584     s->is_decoded        = 0;
2585     s->first_nal_type    = s->nal_unit_type;
2586
2587     if (s->pps->tiles_enabled_flag)
2588         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2589
2590     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2591     if (ret < 0)
2592         goto fail;
2593
2594     ret = ff_hevc_frame_rps(s);
2595     if (ret < 0) {
2596         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2597         goto fail;
2598     }
2599
2600     s->ref->frame->key_frame = IS_IRAP(s);
2601
2602     ret = set_side_data(s);
2603     if (ret < 0)
2604         goto fail;
2605
2606     s->frame->pict_type = 3 - s->sh.slice_type;
2607
2608     if (!IS_IRAP(s))
2609         ff_hevc_bump_frame(s);
2610
2611     av_frame_unref(s->output_frame);
2612     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2613     if (ret < 0)
2614         goto fail;
2615
2616     if (!s->avctx->hwaccel)
2617         ff_thread_finish_setup(s->avctx);
2618
2619     return 0;
2620
2621 fail:
2622     if (s->ref)
2623         ff_hevc_unref_frame(s, s->ref, ~0);
2624     s->ref = NULL;
2625     return ret;
2626 }
2627
2628 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2629 {
2630     HEVCLocalContext *lc = s->HEVClc;
2631     GetBitContext *gb    = &lc->gb;
2632     int ctb_addr_ts, ret;
2633
2634     ret = init_get_bits8(gb, nal->data, nal->size);
2635     if (ret < 0)
2636         return ret;
2637
2638     ret = hls_nal_unit(s);
2639     if (ret < 0) {
2640         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2641                s->nal_unit_type);
2642         goto fail;
2643     } else if (!ret)
2644         return 0;
2645
2646     switch (s->nal_unit_type) {
2647     case NAL_VPS:
2648         ret = ff_hevc_decode_nal_vps(s);
2649         if (ret < 0)
2650             goto fail;
2651         break;
2652     case NAL_SPS:
2653         ret = ff_hevc_decode_nal_sps(s);
2654         if (ret < 0)
2655             goto fail;
2656         break;
2657     case NAL_PPS:
2658         ret = ff_hevc_decode_nal_pps(s);
2659         if (ret < 0)
2660             goto fail;
2661         break;
2662     case NAL_SEI_PREFIX:
2663     case NAL_SEI_SUFFIX:
2664         ret = ff_hevc_decode_nal_sei(s);
2665         if (ret < 0)
2666             goto fail;
2667         break;
2668     case NAL_TRAIL_R:
2669     case NAL_TRAIL_N:
2670     case NAL_TSA_N:
2671     case NAL_TSA_R:
2672     case NAL_STSA_N:
2673     case NAL_STSA_R:
2674     case NAL_BLA_W_LP:
2675     case NAL_BLA_W_RADL:
2676     case NAL_BLA_N_LP:
2677     case NAL_IDR_W_RADL:
2678     case NAL_IDR_N_LP:
2679     case NAL_CRA_NUT:
2680     case NAL_RADL_N:
2681     case NAL_RADL_R:
2682     case NAL_RASL_N:
2683     case NAL_RASL_R:
2684         ret = hls_slice_header(s);
2685         if (ret < 0)
2686             return ret;
2687
2688         if (s->max_ra == INT_MAX) {
2689             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2690                 s->max_ra = s->poc;
2691             } else {
2692                 if (IS_IDR(s))
2693                     s->max_ra = INT_MIN;
2694             }
2695         }
2696
2697         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2698             s->poc <= s->max_ra) {
2699             s->is_decoded = 0;
2700             break;
2701         } else {
2702             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2703                 s->max_ra = INT_MIN;
2704         }
2705
2706         if (s->sh.first_slice_in_pic_flag) {
2707             ret = hevc_frame_start(s);
2708             if (ret < 0)
2709                 return ret;
2710         } else if (!s->ref) {
2711             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2712             goto fail;
2713         }
2714
2715         if (s->nal_unit_type != s->first_nal_type) {
2716             av_log(s->avctx, AV_LOG_ERROR,
2717                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2718                    s->first_nal_type, s->nal_unit_type);
2719             return AVERROR_INVALIDDATA;
2720         }
2721
2722         if (!s->sh.dependent_slice_segment_flag &&
2723             s->sh.slice_type != I_SLICE) {
2724             ret = ff_hevc_slice_rpl(s);
2725             if (ret < 0) {
2726                 av_log(s->avctx, AV_LOG_WARNING,
2727                        "Error constructing the reference lists for the current slice.\n");
2728                 goto fail;
2729             }
2730         }
2731
2732         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2733             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2734             if (ret < 0)
2735                 goto fail;
2736         }
2737
2738         if (s->avctx->hwaccel) {
2739             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2740             if (ret < 0)
2741                 goto fail;
2742         } else {
2743             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2744                 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2745             else
2746                 ctb_addr_ts = hls_slice_data(s);
2747             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2748                 s->is_decoded = 1;
2749             }
2750
2751             if (ctb_addr_ts < 0) {
2752                 ret = ctb_addr_ts;
2753                 goto fail;
2754             }
2755         }
2756         break;
2757     case NAL_EOS_NUT:
2758     case NAL_EOB_NUT:
2759         s->seq_decode = (s->seq_decode + 1) & 0xff;
2760         s->max_ra     = INT_MAX;
2761         break;
2762     case NAL_AUD:
2763     case NAL_FD_NUT:
2764         break;
2765     default:
2766         av_log(s->avctx, AV_LOG_INFO,
2767                "Skipping NAL unit %d\n", s->nal_unit_type);
2768     }
2769
2770     return 0;
2771 fail:
2772     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2773         return ret;
2774     return 0;
2775 }
2776
2777 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2778  * between these functions would be nice. */
2779 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2780                          HEVCNAL *nal)
2781 {
2782     int i, si, di;
2783     uint8_t *dst;
2784
2785     s->skipped_bytes = 0;
2786 #define STARTCODE_TEST                                                  \
2787         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2788             if (src[i + 2] != 3) {                                      \
2789                 /* startcode, so we must be past the end */             \
2790                 length = i;                                             \
2791             }                                                           \
2792             break;                                                      \
2793         }
2794 #if HAVE_FAST_UNALIGNED
2795 #define FIND_FIRST_ZERO                                                 \
2796         if (i > 0 && !src[i])                                           \
2797             i--;                                                        \
2798         while (src[i])                                                  \
2799             i++
2800 #if HAVE_FAST_64BIT
2801     for (i = 0; i + 1 < length; i += 9) {
2802         if (!((~AV_RN64A(src + i) &
2803                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2804               0x8000800080008080ULL))
2805             continue;
2806         FIND_FIRST_ZERO;
2807         STARTCODE_TEST;
2808         i -= 7;
2809     }
2810 #else
2811     for (i = 0; i + 1 < length; i += 5) {
2812         if (!((~AV_RN32A(src + i) &
2813                (AV_RN32A(src + i) - 0x01000101U)) &
2814               0x80008080U))
2815             continue;
2816         FIND_FIRST_ZERO;
2817         STARTCODE_TEST;
2818         i -= 3;
2819     }
2820 #endif /* HAVE_FAST_64BIT */
2821 #else
2822     for (i = 0; i + 1 < length; i += 2) {
2823         if (src[i])
2824             continue;
2825         if (i > 0 && src[i - 1] == 0)
2826             i--;
2827         STARTCODE_TEST;
2828     }
2829 #endif /* HAVE_FAST_UNALIGNED */
2830
2831     if (i >= length - 1) { // no escaped 0
2832         nal->data     =
2833         nal->raw_data = src;
2834         nal->size     =
2835         nal->raw_size = length;
2836         return length;
2837     }
2838
2839     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2840                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2841     if (!nal->rbsp_buffer)
2842         return AVERROR(ENOMEM);
2843
2844     dst = nal->rbsp_buffer;
2845
2846     memcpy(dst, src, i);
2847     si = di = i;
2848     while (si + 2 < length) {
2849         // remove escapes (very rare 1:2^22)
2850         if (src[si + 2] > 3) {
2851             dst[di++] = src[si++];
2852             dst[di++] = src[si++];
2853         } else if (src[si] == 0 && src[si + 1] == 0) {
2854             if (src[si + 2] == 3) { // escape
2855                 dst[di++] = 0;
2856                 dst[di++] = 0;
2857                 si       += 3;
2858
2859                 s->skipped_bytes++;
2860                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2861                     s->skipped_bytes_pos_size *= 2;
2862                     av_reallocp_array(&s->skipped_bytes_pos,
2863                             s->skipped_bytes_pos_size,
2864                             sizeof(*s->skipped_bytes_pos));
2865                     if (!s->skipped_bytes_pos)
2866                         return AVERROR(ENOMEM);
2867                 }
2868                 if (s->skipped_bytes_pos)
2869                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2870                 continue;
2871             } else // next start code
2872                 goto nsc;
2873         }
2874
2875         dst[di++] = src[si++];
2876     }
2877     while (si < length)
2878         dst[di++] = src[si++];
2879
2880 nsc:
2881     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2882
2883     nal->data = dst;
2884     nal->size = di;
2885     nal->raw_data = src;
2886     nal->raw_size = si;
2887     return si;
2888 }
2889
2890 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2891 {
2892     int i, consumed, ret = 0;
2893
2894     s->ref = NULL;
2895     s->last_eos = s->eos;
2896     s->eos = 0;
2897
2898     /* split the input packet into NAL units, so we know the upper bound on the
2899      * number of slices in the frame */
2900     s->nb_nals = 0;
2901     while (length >= 4) {
2902         HEVCNAL *nal;
2903         int extract_length = 0;
2904
2905         if (s->is_nalff) {
2906             int i;
2907             for (i = 0; i < s->nal_length_size; i++)
2908                 extract_length = (extract_length << 8) | buf[i];
2909             buf    += s->nal_length_size;
2910             length -= s->nal_length_size;
2911
2912             if (extract_length > length) {
2913                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2914                 ret = AVERROR_INVALIDDATA;
2915                 goto fail;
2916             }
2917         } else {
2918             /* search start code */
2919             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2920                 ++buf;
2921                 --length;
2922                 if (length < 4) {
2923                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2924                     ret = AVERROR_INVALIDDATA;
2925                     goto fail;
2926                 }
2927             }
2928
2929             buf           += 3;
2930             length        -= 3;
2931         }
2932
2933         if (!s->is_nalff)
2934             extract_length = length;
2935
2936         if (s->nals_allocated < s->nb_nals + 1) {
2937             int new_size = s->nals_allocated + 1;
2938             void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2939             ret = AVERROR(ENOMEM);
2940             if (!tmp) {
2941                 goto fail;
2942             }
2943             s->nals = tmp;
2944             memset(s->nals + s->nals_allocated, 0,
2945                    (new_size - s->nals_allocated) * sizeof(*s->nals));
2946
2947             tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2948             if (!tmp)
2949                 goto fail;
2950             s->skipped_bytes_nal = tmp;
2951
2952             tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2953             if (!tmp)
2954                 goto fail;
2955             s->skipped_bytes_pos_size_nal = tmp;
2956
2957             tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2958             if (!tmp)
2959                 goto fail;
2960             s->skipped_bytes_pos_nal = tmp;
2961
2962             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2963             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2964             if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2965                 goto fail;
2966             s->nals_allocated = new_size;
2967         }
2968         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2969         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2970         nal = &s->nals[s->nb_nals];
2971
2972         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2973
2974         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2975         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2976         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2977
2978
2979         if (consumed < 0) {
2980             ret = consumed;
2981             goto fail;
2982         }
2983
2984         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2985         if (ret < 0)
2986             goto fail;
2987         hls_nal_unit(s);
2988
2989         if (s->nal_unit_type == NAL_EOB_NUT ||
2990             s->nal_unit_type == NAL_EOS_NUT)
2991             s->eos = 1;
2992
2993         buf    += consumed;
2994         length -= consumed;
2995     }
2996
2997     /* parse the NAL units */
2998     for (i = 0; i < s->nb_nals; i++) {
2999         int ret;
3000         s->skipped_bytes = s->skipped_bytes_nal[i];
3001         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
3002
3003         ret = decode_nal_unit(s, &s->nals[i]);
3004         if (ret < 0) {
3005             av_log(s->avctx, AV_LOG_WARNING,
3006                    "Error parsing NAL unit #%d.\n", i);
3007             goto fail;
3008         }
3009     }
3010
3011 fail:
3012     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3013         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3014
3015     return ret;
3016 }
3017
3018 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3019 {
3020     int i;
3021     for (i = 0; i < 16; i++)
3022         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3023 }
3024
3025 static int verify_md5(HEVCContext *s, AVFrame *frame)
3026 {
3027     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3028     int pixel_shift;
3029     int i, j;
3030
3031     if (!desc)
3032         return AVERROR(EINVAL);
3033
3034     pixel_shift = desc->comp[0].depth_minus1 > 7;
3035
3036     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3037            s->poc);
3038
3039     /* the checksums are LE, so we have to byteswap for >8bpp formats
3040      * on BE arches */
3041 #if HAVE_BIGENDIAN
3042     if (pixel_shift && !s->checksum_buf) {
3043         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3044                        FFMAX3(frame->linesize[0], frame->linesize[1],
3045                               frame->linesize[2]));
3046         if (!s->checksum_buf)
3047             return AVERROR(ENOMEM);
3048     }
3049 #endif
3050
3051     for (i = 0; frame->data[i]; i++) {
3052         int width  = s->avctx->coded_width;
3053         int height = s->avctx->coded_height;
3054         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3055         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3056         uint8_t md5[16];
3057
3058         av_md5_init(s->md5_ctx);
3059         for (j = 0; j < h; j++) {
3060             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3061 #if HAVE_BIGENDIAN
3062             if (pixel_shift) {
3063                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3064                                     (const uint16_t *) src, w);
3065                 src = s->checksum_buf;
3066             }
3067 #endif
3068             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3069         }
3070         av_md5_final(s->md5_ctx, md5);
3071
3072         if (!memcmp(md5, s->md5[i], 16)) {
3073             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3074             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3075             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3076         } else {
3077             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3078             print_md5(s->avctx, AV_LOG_ERROR, md5);
3079             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3080             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3081             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3082             return AVERROR_INVALIDDATA;
3083         }
3084     }
3085
3086     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3087
3088     return 0;
3089 }
3090
3091 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3092                              AVPacket *avpkt)
3093 {
3094     int ret;
3095     HEVCContext *s = avctx->priv_data;
3096
3097     if (!avpkt->size) {
3098         ret = ff_hevc_output_frame(s, data, 1);
3099         if (ret < 0)
3100             return ret;
3101
3102         *got_output = ret;
3103         return 0;
3104     }
3105
3106     s->ref = NULL;
3107     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3108     if (ret < 0)
3109         return ret;
3110
3111     if (avctx->hwaccel) {
3112         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3113             av_log(avctx, AV_LOG_ERROR,
3114                    "hardware accelerator failed to decode picture\n");
3115     } else {
3116         /* verify the SEI checksum */
3117         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3118             s->is_md5) {
3119             ret = verify_md5(s, s->ref->frame);
3120             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3121                 ff_hevc_unref_frame(s, s->ref, ~0);
3122                 return ret;
3123             }
3124         }
3125     }
3126     s->is_md5 = 0;
3127
3128     if (s->is_decoded) {
3129         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3130         s->is_decoded = 0;
3131     }
3132
3133     if (s->output_frame->buf[0]) {
3134         av_frame_move_ref(data, s->output_frame);
3135         *got_output = 1;
3136     }
3137
3138     return avpkt->size;
3139 }
3140
3141 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3142 {
3143     int ret;
3144
3145     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3146     if (ret < 0)
3147         return ret;
3148
3149     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3150     if (!dst->tab_mvf_buf)
3151         goto fail;
3152     dst->tab_mvf = src->tab_mvf;
3153
3154     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3155     if (!dst->rpl_tab_buf)
3156         goto fail;
3157     dst->rpl_tab = src->rpl_tab;
3158
3159     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3160     if (!dst->rpl_buf)
3161         goto fail;
3162
3163     dst->poc        = src->poc;
3164     dst->ctb_count  = src->ctb_count;
3165     dst->window     = src->window;
3166     dst->flags      = src->flags;
3167     dst->sequence   = src->sequence;
3168
3169     if (src->hwaccel_picture_private) {
3170         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3171         if (!dst->hwaccel_priv_buf)
3172             goto fail;
3173         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3174     }
3175
3176     return 0;
3177 fail:
3178     ff_hevc_unref_frame(s, dst, ~0);
3179     return AVERROR(ENOMEM);
3180 }
3181
3182 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3183 {
3184     HEVCContext       *s = avctx->priv_data;
3185     int i;
3186
3187     pic_arrays_free(s);
3188
3189     av_freep(&s->md5_ctx);
3190
3191     for(i=0; i < s->nals_allocated; i++) {
3192         av_freep(&s->skipped_bytes_pos_nal[i]);
3193     }
3194     av_freep(&s->skipped_bytes_pos_size_nal);
3195     av_freep(&s->skipped_bytes_nal);
3196     av_freep(&s->skipped_bytes_pos_nal);
3197
3198     av_freep(&s->cabac_state);
3199
3200     for (i = 0; i < 3; i++) {
3201         av_freep(&s->sao_pixel_buffer_h[i]);
3202         av_freep(&s->sao_pixel_buffer_v[i]);
3203     }
3204     av_frame_free(&s->output_frame);
3205
3206     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3207         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3208         av_frame_free(&s->DPB[i].frame);
3209     }
3210
3211     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3212         av_buffer_unref(&s->vps_list[i]);
3213     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3214         av_buffer_unref(&s->sps_list[i]);
3215     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3216         av_buffer_unref(&s->pps_list[i]);
3217     s->sps = NULL;
3218     s->pps = NULL;
3219     s->vps = NULL;
3220
3221     av_buffer_unref(&s->current_sps);
3222
3223     av_freep(&s->sh.entry_point_offset);
3224     av_freep(&s->sh.offset);
3225     av_freep(&s->sh.size);
3226
3227     for (i = 1; i < s->threads_number; i++) {
3228         HEVCLocalContext *lc = s->HEVClcList[i];
3229         if (lc) {
3230             av_freep(&s->HEVClcList[i]);
3231             av_freep(&s->sList[i]);
3232         }
3233     }
3234     if (s->HEVClc == s->HEVClcList[0])
3235         s->HEVClc = NULL;
3236     av_freep(&s->HEVClcList[0]);
3237
3238     for (i = 0; i < s->nals_allocated; i++)
3239         av_freep(&s->nals[i].rbsp_buffer);
3240     av_freep(&s->nals);
3241     s->nals_allocated = 0;
3242
3243     return 0;
3244 }
3245
3246 static av_cold int hevc_init_context(AVCodecContext *avctx)
3247 {
3248     HEVCContext *s = avctx->priv_data;
3249     int i;
3250
3251     s->avctx = avctx;
3252
3253     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3254     if (!s->HEVClc)
3255         goto fail;
3256     s->HEVClcList[0] = s->HEVClc;
3257     s->sList[0] = s;
3258
3259     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3260     if (!s->cabac_state)
3261         goto fail;
3262
3263     s->output_frame = av_frame_alloc();
3264     if (!s->output_frame)
3265         goto fail;
3266
3267     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3268         s->DPB[i].frame = av_frame_alloc();
3269         if (!s->DPB[i].frame)
3270             goto fail;
3271         s->DPB[i].tf.f = s->DPB[i].frame;
3272     }
3273
3274     s->max_ra = INT_MAX;
3275
3276     s->md5_ctx = av_md5_alloc();
3277     if (!s->md5_ctx)
3278         goto fail;
3279
3280     ff_bswapdsp_init(&s->bdsp);
3281
3282     s->context_initialized = 1;
3283     s->eos = 0;
3284
3285     return 0;
3286
3287 fail:
3288     hevc_decode_free(avctx);
3289     return AVERROR(ENOMEM);
3290 }
3291
3292 static int hevc_update_thread_context(AVCodecContext *dst,
3293                                       const AVCodecContext *src)
3294 {
3295     HEVCContext *s  = dst->priv_data;
3296     HEVCContext *s0 = src->priv_data;
3297     int i, ret;
3298
3299     if (!s->context_initialized) {
3300         ret = hevc_init_context(dst);
3301         if (ret < 0)
3302             return ret;
3303     }
3304
3305     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3306         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3307         if (s0->DPB[i].frame->buf[0]) {
3308             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3309             if (ret < 0)
3310                 return ret;
3311         }
3312     }
3313
3314     if (s->sps != s0->sps)
3315         s->sps = NULL;
3316     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3317         av_buffer_unref(&s->vps_list[i]);
3318         if (s0->vps_list[i]) {
3319             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3320             if (!s->vps_list[i])
3321                 return AVERROR(ENOMEM);
3322         }
3323     }
3324
3325     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3326         av_buffer_unref(&s->sps_list[i]);
3327         if (s0->sps_list[i]) {
3328             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3329             if (!s->sps_list[i])
3330                 return AVERROR(ENOMEM);
3331         }
3332     }
3333
3334     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3335         av_buffer_unref(&s->pps_list[i]);
3336         if (s0->pps_list[i]) {
3337             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3338             if (!s->pps_list[i])
3339                 return AVERROR(ENOMEM);
3340         }
3341     }
3342
3343     av_buffer_unref(&s->current_sps);
3344     if (s0->current_sps) {
3345         s->current_sps = av_buffer_ref(s0->current_sps);
3346         if (!s->current_sps)
3347             return AVERROR(ENOMEM);
3348     }
3349
3350     if (s->sps != s0->sps)
3351         if ((ret = set_sps(s, s0->sps, src->pix_fmt)) < 0)
3352             return ret;
3353
3354     s->seq_decode = s0->seq_decode;
3355     s->seq_output = s0->seq_output;
3356     s->pocTid0    = s0->pocTid0;
3357     s->max_ra     = s0->max_ra;
3358     s->eos        = s0->eos;
3359
3360     s->is_nalff        = s0->is_nalff;
3361     s->nal_length_size = s0->nal_length_size;
3362
3363     s->threads_number      = s0->threads_number;
3364     s->threads_type        = s0->threads_type;
3365
3366     if (s0->eos) {
3367         s->seq_decode = (s->seq_decode + 1) & 0xff;
3368         s->max_ra = INT_MAX;
3369     }
3370
3371     return 0;
3372 }
3373
3374 static int hevc_decode_extradata(HEVCContext *s)
3375 {
3376     AVCodecContext *avctx = s->avctx;
3377     GetByteContext gb;
3378     int ret, i;
3379
3380     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3381
3382     if (avctx->extradata_size > 3 &&
3383         (avctx->extradata[0] || avctx->extradata[1] ||
3384          avctx->extradata[2] > 1)) {
3385         /* It seems the extradata is encoded as hvcC format.
3386          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3387          * is finalized. When finalized, configurationVersion will be 1 and we
3388          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3389         int i, j, num_arrays, nal_len_size;
3390
3391         s->is_nalff = 1;
3392
3393         bytestream2_skip(&gb, 21);
3394         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3395         num_arrays   = bytestream2_get_byte(&gb);
3396
3397         /* nal units in the hvcC always have length coded with 2 bytes,
3398          * so put a fake nal_length_size = 2 while parsing them */
3399         s->nal_length_size = 2;
3400
3401         /* Decode nal units from hvcC. */
3402         for (i = 0; i < num_arrays; i++) {
3403             int type = bytestream2_get_byte(&gb) & 0x3f;
3404             int cnt  = bytestream2_get_be16(&gb);
3405
3406             for (j = 0; j < cnt; j++) {
3407                 // +2 for the nal size field
3408                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3409                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3410                     av_log(s->avctx, AV_LOG_ERROR,
3411                            "Invalid NAL unit size in extradata.\n");
3412                     return AVERROR_INVALIDDATA;
3413                 }
3414
3415                 ret = decode_nal_units(s, gb.buffer, nalsize);
3416                 if (ret < 0) {
3417                     av_log(avctx, AV_LOG_ERROR,
3418                            "Decoding nal unit %d %d from hvcC failed\n",
3419                            type, i);
3420                     return ret;
3421                 }
3422                 bytestream2_skip(&gb, nalsize);
3423             }
3424         }
3425
3426         /* Now store right nal length size, that will be used to parse
3427          * all other nals */
3428         s->nal_length_size = nal_len_size;
3429     } else {
3430         s->is_nalff = 0;
3431         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3432         if (ret < 0)
3433             return ret;
3434     }
3435
3436     /* export stream parameters from the first SPS */
3437     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3438         if (s->sps_list[i]) {
3439             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3440             export_stream_params(s->avctx, s, sps);
3441             break;
3442         }
3443     }
3444
3445     return 0;
3446 }
3447
3448 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3449 {
3450     HEVCContext *s = avctx->priv_data;
3451     int ret;
3452
3453     ff_init_cabac_states();
3454
3455     avctx->internal->allocate_progress = 1;
3456
3457     ret = hevc_init_context(avctx);
3458     if (ret < 0)
3459         return ret;
3460
3461     s->enable_parallel_tiles = 0;
3462     s->picture_struct = 0;
3463
3464     if(avctx->active_thread_type & FF_THREAD_SLICE)
3465         s->threads_number = avctx->thread_count;
3466     else
3467         s->threads_number = 1;
3468
3469     if (avctx->extradata_size > 0 && avctx->extradata) {
3470         ret = hevc_decode_extradata(s);
3471         if (ret < 0) {
3472             hevc_decode_free(avctx);
3473             return ret;
3474         }
3475     }
3476
3477     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3478             s->threads_type = FF_THREAD_FRAME;
3479         else
3480             s->threads_type = FF_THREAD_SLICE;
3481
3482     return 0;
3483 }
3484
3485 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3486 {
3487     HEVCContext *s = avctx->priv_data;
3488     int ret;
3489
3490     memset(s, 0, sizeof(*s));
3491
3492     ret = hevc_init_context(avctx);
3493     if (ret < 0)
3494         return ret;
3495
3496     return 0;
3497 }
3498
3499 static void hevc_decode_flush(AVCodecContext *avctx)
3500 {
3501     HEVCContext *s = avctx->priv_data;
3502     ff_hevc_flush_dpb(s);
3503     s->max_ra = INT_MAX;
3504 }
3505
3506 #define OFFSET(x) offsetof(HEVCContext, x)
3507 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3508
3509 static const AVProfile profiles[] = {
3510     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3511     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3512     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3513     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3514     { FF_PROFILE_UNKNOWN },
3515 };
3516
3517 static const AVOption options[] = {
3518     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3519         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3520     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3521         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3522     { NULL },
3523 };
3524
3525 static const AVClass hevc_decoder_class = {
3526     .class_name = "HEVC decoder",
3527     .item_name  = av_default_item_name,
3528     .option     = options,
3529     .version    = LIBAVUTIL_VERSION_INT,
3530 };
3531
3532 AVCodec ff_hevc_decoder = {
3533     .name                  = "hevc",
3534     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3535     .type                  = AVMEDIA_TYPE_VIDEO,
3536     .id                    = AV_CODEC_ID_HEVC,
3537     .priv_data_size        = sizeof(HEVCContext),
3538     .priv_class            = &hevc_decoder_class,
3539     .init                  = hevc_decode_init,
3540     .close                 = hevc_decode_free,
3541     .decode                = hevc_decode_frame,
3542     .flush                 = hevc_decode_flush,
3543     .update_thread_context = hevc_update_thread_context,
3544     .init_thread_copy      = hevc_init_thread_copy,
3545     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3546                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3547     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3548 };