git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx,
 284                                  const HEVCContext *s, const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     export_stream_params(s->avctx, s, sps);
 336
 337     pic_arrays_free(s);
 338     ret = pic_arrays_init(s, sps);
 339     if (ret < 0)
 340         goto fail;
 341
 342     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 343 #if CONFIG_HEVC_DXVA2_HWACCEL
 344         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 345 #endif
 346 #if CONFIG_HEVC_D3D11VA_HWACCEL
 347         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 348 #endif
 349     }
 350
 351     if (pix_fmt == AV_PIX_FMT_NONE) {
 352         *fmt++ = sps->pix_fmt;
 353         *fmt = AV_PIX_FMT_NONE;
 354
 355         ret = ff_thread_get_format(s->avctx, pix_fmts);
 356         if (ret < 0)
 357             goto fail;
 358         s->avctx->pix_fmt = ret;
 359     }
 360     else {
 361         s->avctx->pix_fmt = pix_fmt;
 362     }
 363
 364     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 365     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 366     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 367
 368     for (i = 0; i < 3; i++) {
 369         av_freep(&s->sao_pixel_buffer_h[i]);
 370         av_freep(&s->sao_pixel_buffer_v[i]);
 371     }
 372
 373     if (sps->sao_enabled && !s->avctx->hwaccel) {
 374         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 375         int c_idx;
 376
 377         for(c_idx = 0; c_idx < c_count; c_idx++) {
 378             int w = sps->width >> sps->hshift[c_idx];
 379             int h = sps->height >> sps->vshift[c_idx];
 380             s->sao_pixel_buffer_h[c_idx] =
 381                 av_malloc((w * 2 * sps->ctb_height) <<
 382                           sps->pixel_shift);
 383             s->sao_pixel_buffer_v[c_idx] =
 384                 av_malloc((h * 2 * sps->ctb_width) <<
 385                           sps->pixel_shift);
 386         }
 387     }
 388
 389     s->sps = sps;
 390     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 391
 392     return 0;
 393
 394 fail:
 395     pic_arrays_free(s);
 396     s->sps = NULL;
 397     return ret;
 398 }
 399
 400 static int hls_slice_header(HEVCContext *s)
 401 {
 402     GetBitContext *gb = &s->HEVClc->gb;
 403     SliceHeader *sh   = &s->sh;
 404     int i, ret;
 405
 406     // Coded parameters
 407     sh->first_slice_in_pic_flag = get_bits1(gb);
 408     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 409         s->seq_decode = (s->seq_decode + 1) & 0xff;
 410         s->max_ra     = INT_MAX;
 411         if (IS_IDR(s))
 412             ff_hevc_clear_refs(s);
 413     }
 414     sh->no_output_of_prior_pics_flag = 0;
 415     if (IS_IRAP(s))
 416         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 417
 418     sh->pps_id = get_ue_golomb_long(gb);
 419     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 420         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 421         return AVERROR_INVALIDDATA;
 422     }
 423     if (!sh->first_slice_in_pic_flag &&
 424         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 425         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 426         return AVERROR_INVALIDDATA;
 427     }
 428     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 429     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 430         sh->no_output_of_prior_pics_flag = 1;
 431
 432     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 433         const HEVCSPS* last_sps = s->sps;
 434         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 435         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 436             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 437                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 438                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 439                 sh->no_output_of_prior_pics_flag = 0;
 440         }
 441         ff_hevc_clear_refs(s);
 442         ret = set_sps(s, s->sps, AV_PIX_FMT_NONE);
 443         if (ret < 0)
 444             return ret;
 445
 446         s->seq_decode = (s->seq_decode + 1) & 0xff;
 447         s->max_ra     = INT_MAX;
 448     }
 449
 450     sh->dependent_slice_segment_flag = 0;
 451     if (!sh->first_slice_in_pic_flag) {
 452         int slice_address_length;
 453
 454         if (s->pps->dependent_slice_segments_enabled_flag)
 455             sh->dependent_slice_segment_flag = get_bits1(gb);
 456
 457         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 458                                             s->sps->ctb_height);
 459         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 460         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 461             av_log(s->avctx, AV_LOG_ERROR,
 462                    "Invalid slice segment address: %u.\n",
 463                    sh->slice_segment_addr);
 464             return AVERROR_INVALIDDATA;
 465         }
 466
 467         if (!sh->dependent_slice_segment_flag) {
 468             sh->slice_addr = sh->slice_segment_addr;
 469             s->slice_idx++;
 470         }
 471     } else {
 472         sh->slice_segment_addr = sh->slice_addr = 0;
 473         s->slice_idx           = 0;
 474         s->slice_initialized   = 0;
 475     }
 476
 477     if (!sh->dependent_slice_segment_flag) {
 478         s->slice_initialized = 0;
 479
 480         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 481             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 482
 483         sh->slice_type = get_ue_golomb_long(gb);
 484         if (!(sh->slice_type == I_SLICE ||
 485               sh->slice_type == P_SLICE ||
 486               sh->slice_type == B_SLICE)) {
 487             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 488                    sh->slice_type);
 489             return AVERROR_INVALIDDATA;
 490         }
 491         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 492             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 493             return AVERROR_INVALIDDATA;
 494         }
 495
 496         // when flag is not present, picture is inferred to be output
 497         sh->pic_output_flag = 1;
 498         if (s->pps->output_flag_present_flag)
 499             sh->pic_output_flag = get_bits1(gb);
 500
 501         if (s->sps->separate_colour_plane_flag)
 502             sh->colour_plane_id = get_bits(gb, 2);
 503
 504         if (!IS_IDR(s)) {
 505             int poc;
 506
 507             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 508             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 509             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 510                 av_log(s->avctx, AV_LOG_WARNING,
 511                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 512                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 513                     return AVERROR_INVALIDDATA;
 514                 poc = s->poc;
 515             }
 516             s->poc = poc;
 517
 518             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 519             if (!sh->short_term_ref_pic_set_sps_flag) {
 520                 int pos = get_bits_left(gb);
 521                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 522                 if (ret < 0)
 523                     return ret;
 524
 525                 sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 526                 sh->short_term_rps = &sh->slice_rps;
 527             } else {
 528                 int numbits, rps_idx;
 529
 530                 if (!s->sps->nb_st_rps) {
 531                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 532                     return AVERROR_INVALIDDATA;
 533                 }
 534
 535                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 536                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 537                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 538             }
 539
 540             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 541             if (ret < 0) {
 542                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 543                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 544                     return AVERROR_INVALIDDATA;
 545             }
 546
 547             if (s->sps->sps_temporal_mvp_enabled_flag)
 548                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 549             else
 550                 sh->slice_temporal_mvp_enabled_flag = 0;
 551         } else {
 552             s->sh.short_term_rps = NULL;
 553             s->poc               = 0;
 554         }
 555
 556         /* 8.3.1 */
 557         if (s->temporal_id == 0 &&
 558             s->nal_unit_type != NAL_TRAIL_N &&
 559             s->nal_unit_type != NAL_TSA_N   &&
 560             s->nal_unit_type != NAL_STSA_N  &&
 561             s->nal_unit_type != NAL_RADL_N  &&
 562             s->nal_unit_type != NAL_RADL_R  &&
 563             s->nal_unit_type != NAL_RASL_N  &&
 564             s->nal_unit_type != NAL_RASL_R)
 565             s->pocTid0 = s->poc;
 566
 567         if (s->sps->sao_enabled) {
 568             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 569             if (s->sps->chroma_format_idc) {
 570                 sh->slice_sample_adaptive_offset_flag[1] =
 571                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 572             }
 573         } else {
 574             sh->slice_sample_adaptive_offset_flag[0] = 0;
 575             sh->slice_sample_adaptive_offset_flag[1] = 0;
 576             sh->slice_sample_adaptive_offset_flag[2] = 0;
 577         }
 578
 579         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 580         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 581             int nb_refs;
 582
 583             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 584             if (sh->slice_type == B_SLICE)
 585                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 586
 587             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 588                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 589                 if (sh->slice_type == B_SLICE)
 590                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 591             }
 592             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 593                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 594                        sh->nb_refs[L0], sh->nb_refs[L1]);
 595                 return AVERROR_INVALIDDATA;
 596             }
 597
 598             sh->rpl_modification_flag[0] = 0;
 599             sh->rpl_modification_flag[1] = 0;
 600             nb_refs = ff_hevc_frame_nb_refs(s);
 601             if (!nb_refs) {
 602                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 603                 return AVERROR_INVALIDDATA;
 604             }
 605
 606             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 607                 sh->rpl_modification_flag[0] = get_bits1(gb);
 608                 if (sh->rpl_modification_flag[0]) {
 609                     for (i = 0; i < sh->nb_refs[L0]; i++)
 610                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 611                 }
 612
 613                 if (sh->slice_type == B_SLICE) {
 614                     sh->rpl_modification_flag[1] = get_bits1(gb);
 615                     if (sh->rpl_modification_flag[1] == 1)
 616                         for (i = 0; i < sh->nb_refs[L1]; i++)
 617                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 618                 }
 619             }
 620
 621             if (sh->slice_type == B_SLICE)
 622                 sh->mvd_l1_zero_flag = get_bits1(gb);
 623
 624             if (s->pps->cabac_init_present_flag)
 625                 sh->cabac_init_flag = get_bits1(gb);
 626             else
 627                 sh->cabac_init_flag = 0;
 628
 629             sh->collocated_ref_idx = 0;
 630             if (sh->slice_temporal_mvp_enabled_flag) {
 631                 sh->collocated_list = L0;
 632                 if (sh->slice_type == B_SLICE)
 633                     sh->collocated_list = !get_bits1(gb);
 634
 635                 if (sh->nb_refs[sh->collocated_list] > 1) {
 636                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 637                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 638                         av_log(s->avctx, AV_LOG_ERROR,
 639                                "Invalid collocated_ref_idx: %d.\n",
 640                                sh->collocated_ref_idx);
 641                         return AVERROR_INVALIDDATA;
 642                     }
 643                 }
 644             }
 645
 646             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 647                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 648                 pred_weight_table(s, gb);
 649             }
 650
 651             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 652             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 653                 av_log(s->avctx, AV_LOG_ERROR,
 654                        "Invalid number of merging MVP candidates: %d.\n",
 655                        sh->max_num_merge_cand);
 656                 return AVERROR_INVALIDDATA;
 657             }
 658         }
 659
 660         sh->slice_qp_delta = get_se_golomb(gb);
 661
 662         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 663             sh->slice_cb_qp_offset = get_se_golomb(gb);
 664             sh->slice_cr_qp_offset = get_se_golomb(gb);
 665         } else {
 666             sh->slice_cb_qp_offset = 0;
 667             sh->slice_cr_qp_offset = 0;
 668         }
 669
 670         if (s->pps->chroma_qp_offset_list_enabled_flag)
 671             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 672         else
 673             sh->cu_chroma_qp_offset_enabled_flag = 0;
 674
 675         if (s->pps->deblocking_filter_control_present_flag) {
 676             int deblocking_filter_override_flag = 0;
 677
 678             if (s->pps->deblocking_filter_override_enabled_flag)
 679                 deblocking_filter_override_flag = get_bits1(gb);
 680
 681             if (deblocking_filter_override_flag) {
 682                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 683                 if (!sh->disable_deblocking_filter_flag) {
 684                     sh->beta_offset = get_se_golomb(gb) * 2;
 685                     sh->tc_offset   = get_se_golomb(gb) * 2;
 686                 }
 687             } else {
 688                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 689                 sh->beta_offset                    = s->pps->beta_offset;
 690                 sh->tc_offset                      = s->pps->tc_offset;
 691             }
 692         } else {
 693             sh->disable_deblocking_filter_flag = 0;
 694             sh->beta_offset                    = 0;
 695             sh->tc_offset                      = 0;
 696         }
 697
 698         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 699             (sh->slice_sample_adaptive_offset_flag[0] ||
 700              sh->slice_sample_adaptive_offset_flag[1] ||
 701              !sh->disable_deblocking_filter_flag)) {
 702             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 703         } else {
 704             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 705         }
 706     } else if (!s->slice_initialized) {
 707         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 708         return AVERROR_INVALIDDATA;
 709     }
 710
 711     sh->num_entry_point_offsets = 0;
 712     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 713         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 714         // It would be possible to bound this tighter but this here is simpler
 715         if (num_entry_point_offsets > get_bits_left(gb)) {
 716             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 717             return AVERROR_INVALIDDATA;
 718         }
 719
 720         sh->num_entry_point_offsets = num_entry_point_offsets;
 721         if (sh->num_entry_point_offsets > 0) {
 722             int offset_len = get_ue_golomb_long(gb) + 1;
 723
 724             if (offset_len < 1 || offset_len > 32) {
 725                 sh->num_entry_point_offsets = 0;
 726                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 727                 return AVERROR_INVALIDDATA;
 728             }
 729
 730             av_freep(&sh->entry_point_offset);
 731             av_freep(&sh->offset);
 732             av_freep(&sh->size);
 733             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 734             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 735             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 736             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 737                 sh->num_entry_point_offsets = 0;
 738                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 739                 return AVERROR(ENOMEM);
 740             }
 741             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 742                 unsigned val = get_bits_long(gb, offset_len);
 743                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 744             }
 745             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 746                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 747                 s->threads_number = 1;
 748             } else
 749                 s->enable_parallel_tiles = 0;
 750         } else
 751             s->enable_parallel_tiles = 0;
 752     }
 753
 754     if (s->pps->slice_header_extension_present_flag) {
 755         unsigned int length = get_ue_golomb_long(gb);
 756         if (length*8LL > get_bits_left(gb)) {
 757             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 758             return AVERROR_INVALIDDATA;
 759         }
 760         for (i = 0; i < length; i++)
 761             skip_bits(gb, 8);  // slice_header_extension_data_byte
 762     }
 763
 764     // Inferred parameters
 765     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 766     if (sh->slice_qp > 51 ||
 767         sh->slice_qp < -s->sps->qp_bd_offset) {
 768         av_log(s->avctx, AV_LOG_ERROR,
 769                "The slice_qp %d is outside the valid range "
 770                "[%d, 51].\n",
 771                sh->slice_qp,
 772                -s->sps->qp_bd_offset);
 773         return AVERROR_INVALIDDATA;
 774     }
 775
 776     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 777
 778     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 779         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 780         return AVERROR_INVALIDDATA;
 781     }
 782
 783     if (get_bits_left(gb) < 0) {
 784         av_log(s->avctx, AV_LOG_ERROR,
 785                "Overread slice header by %d bits\n", -get_bits_left(gb));
 786         return AVERROR_INVALIDDATA;
 787     }
 788
 789     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 790
 791     if (!s->pps->cu_qp_delta_enabled_flag)
 792         s->HEVClc->qp_y = s->sh.slice_qp;
 793
 794     s->slice_initialized = 1;
 795     s->HEVClc->tu.cu_qp_offset_cb = 0;
 796     s->HEVClc->tu.cu_qp_offset_cr = 0;
 797
 798     return 0;
 799 }
 800
 801 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 802
 803 #define SET_SAO(elem, value)                            \
 804 do {                                                    \
 805     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 806         sao->elem = value;                              \
 807     else if (sao_merge_left_flag)                       \
 808         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 809     else if (sao_merge_up_flag)                         \
 810         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 811     else                                                \
 812         sao->elem = 0;                                  \
 813 } while (0)
 814
 815 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 816 {
 817     HEVCLocalContext *lc    = s->HEVClc;
 818     int sao_merge_left_flag = 0;
 819     int sao_merge_up_flag   = 0;
 820     SAOParams *sao          = &CTB(s->sao, rx, ry);
 821     int c_idx, i;
 822
 823     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 824         s->sh.slice_sample_adaptive_offset_flag[1]) {
 825         if (rx > 0) {
 826             if (lc->ctb_left_flag)
 827                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 828         }
 829         if (ry > 0 && !sao_merge_left_flag) {
 830             if (lc->ctb_up_flag)
 831                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 832         }
 833     }
 834
 835     for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
 836         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 837                                                  s->pps->log2_sao_offset_scale_chroma;
 838
 839         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 840             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 841             continue;
 842         }
 843
 844         if (c_idx == 2) {
 845             sao->type_idx[2] = sao->type_idx[1];
 846             sao->eo_class[2] = sao->eo_class[1];
 847         } else {
 848             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 849         }
 850
 851         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 852             continue;
 853
 854         for (i = 0; i < 4; i++)
 855             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 856
 857         if (sao->type_idx[c_idx] == SAO_BAND) {
 858             for (i = 0; i < 4; i++) {
 859                 if (sao->offset_abs[c_idx][i]) {
 860                     SET_SAO(offset_sign[c_idx][i],
 861                             ff_hevc_sao_offset_sign_decode(s));
 862                 } else {
 863                     sao->offset_sign[c_idx][i] = 0;
 864                 }
 865             }
 866             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 867         } else if (c_idx != 2) {
 868             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 869         }
 870
 871         // Inferred parameters
 872         sao->offset_val[c_idx][0] = 0;
 873         for (i = 0; i < 4; i++) {
 874             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 875             if (sao->type_idx[c_idx] == SAO_EDGE) {
 876                 if (i > 1)
 877                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 878             } else if (sao->offset_sign[c_idx][i]) {
 879                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 880             }
 881             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 882         }
 883     }
 884 }
 885
 886 #undef SET_SAO
 887 #undef CTB
 888
 889 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 890     HEVCLocalContext *lc    = s->HEVClc;
 891     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 892
 893     if (log2_res_scale_abs_plus1 !=  0) {
 894         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 895         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 896                                (1 - 2 * res_scale_sign_flag);
 897     } else {
 898         lc->tu.res_scale_val = 0;
 899     }
 900
 901
 902     return 0;
 903 }
 904
 905 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 906                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 907                               int log2_cb_size, int log2_trafo_size,
 908                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 909 {
 910     HEVCLocalContext *lc = s->HEVClc;
 911     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 912     int i;
 913
 914     if (lc->cu.pred_mode == MODE_INTRA) {
 915         int trafo_size = 1 << log2_trafo_size;
 916         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 917
 918         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 919     }
 920
 921     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 922         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 923         int scan_idx   = SCAN_DIAG;
 924         int scan_idx_c = SCAN_DIAG;
 925         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 926                          (s->sps->chroma_format_idc == 2 &&
 927                          (cbf_cb[1] || cbf_cr[1]));
 928
 929         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 930             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 931             if (lc->tu.cu_qp_delta != 0)
 932                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 933                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 934             lc->tu.is_cu_qp_delta_coded = 1;
 935
 936             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 937                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 938                 av_log(s->avctx, AV_LOG_ERROR,
 939                        "The cu_qp_delta %d is outside the valid range "
 940                        "[%d, %d].\n",
 941                        lc->tu.cu_qp_delta,
 942                        -(26 + s->sps->qp_bd_offset / 2),
 943                         (25 + s->sps->qp_bd_offset / 2));
 944                 return AVERROR_INVALIDDATA;
 945             }
 946
 947             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 948         }
 949
 950         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 951             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 952             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 953             if (cu_chroma_qp_offset_flag) {
 954                 int cu_chroma_qp_offset_idx  = 0;
 955                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 956                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 957                     av_log(s->avctx, AV_LOG_ERROR,
 958                         "cu_chroma_qp_offset_idx not yet tested.\n");
 959                 }
 960                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 961                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 962             } else {
 963                 lc->tu.cu_qp_offset_cb = 0;
 964                 lc->tu.cu_qp_offset_cr = 0;
 965             }
 966             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 967         }
 968
 969         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 970             if (lc->tu.intra_pred_mode >= 6 &&
 971                 lc->tu.intra_pred_mode <= 14) {
 972                 scan_idx = SCAN_VERT;
 973             } else if (lc->tu.intra_pred_mode >= 22 &&
 974                        lc->tu.intra_pred_mode <= 30) {
 975                 scan_idx = SCAN_HORIZ;
 976             }
 977
 978             if (lc->tu.intra_pred_mode_c >=  6 &&
 979                 lc->tu.intra_pred_mode_c <= 14) {
 980                 scan_idx_c = SCAN_VERT;
 981             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 982                        lc->tu.intra_pred_mode_c <= 30) {
 983                 scan_idx_c = SCAN_HORIZ;
 984             }
 985         }
 986
 987         lc->tu.cross_pf = 0;
 988
 989         if (cbf_luma)
 990             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 991         if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
 992             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 993             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 994             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 995                                 (lc->cu.pred_mode == MODE_INTER ||
 996                                  (lc->tu.chroma_mode_c ==  4)));
 997
 998             if (lc->tu.cross_pf) {
 999                 hls_cross_component_pred(s, 0);
1000             }
1001             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1002                 if (lc->cu.pred_mode == MODE_INTRA) {
1003                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1004                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1005                 }
1006                 if (cbf_cb[i])
1007                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1008                                                 log2_trafo_size_c, scan_idx_c, 1);
1009                 else
1010                     if (lc->tu.cross_pf) {
1011                         ptrdiff_t stride = s->frame->linesize[1];
1012                         int hshift = s->sps->hshift[1];
1013                         int vshift = s->sps->vshift[1];
1014                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1015                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1016                         int size = 1 << log2_trafo_size_c;
1017
1018                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1019                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
1020                         for (i = 0; i < (size * size); i++) {
1021                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1022                         }
1023                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1024                     }
1025             }
1026
1027             if (lc->tu.cross_pf) {
1028                 hls_cross_component_pred(s, 1);
1029             }
1030             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1031                 if (lc->cu.pred_mode == MODE_INTRA) {
1032                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1033                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1034                 }
1035                 if (cbf_cr[i])
1036                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1037                                                 log2_trafo_size_c, scan_idx_c, 2);
1038                 else
1039                     if (lc->tu.cross_pf) {
1040                         ptrdiff_t stride = s->frame->linesize[2];
1041                         int hshift = s->sps->hshift[2];
1042                         int vshift = s->sps->vshift[2];
1043                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1044                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1045                         int size = 1 << log2_trafo_size_c;
1046
1047                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1048                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1049                         for (i = 0; i < (size * size); i++) {
1050                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1051                         }
1052                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1053                     }
1054             }
1055         } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1056             int trafo_size_h = 1 << (log2_trafo_size + 1);
1057             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1058             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1059                 if (lc->cu.pred_mode == MODE_INTRA) {
1060                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1061                                                     trafo_size_h, trafo_size_v);
1062                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1063                 }
1064                 if (cbf_cb[i])
1065                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1066                                                 log2_trafo_size, scan_idx_c, 1);
1067             }
1068             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1069                 if (lc->cu.pred_mode == MODE_INTRA) {
1070                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1071                                                 trafo_size_h, trafo_size_v);
1072                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1073                 }
1074                 if (cbf_cr[i])
1075                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1076                                                 log2_trafo_size, scan_idx_c, 2);
1077             }
1078         }
1079     } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1080         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1081             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1082             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1083             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1084             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1085             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1086             if (s->sps->chroma_format_idc == 2) {
1087                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1088                                                 trafo_size_h, trafo_size_v);
1089                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1090                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1091             }
1092         } else if (blk_idx == 3) {
1093             int trafo_size_h = 1 << (log2_trafo_size + 1);
1094             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1095             ff_hevc_set_neighbour_available(s, xBase, yBase,
1096                                             trafo_size_h, trafo_size_v);
1097             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1098             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1099             if (s->sps->chroma_format_idc == 2) {
1100                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1101                                                 trafo_size_h, trafo_size_v);
1102                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1103                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1104             }
1105         }
1106     }
1107
1108     return 0;
1109 }
1110
1111 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1112 {
1113     int cb_size          = 1 << log2_cb_size;
1114     int log2_min_pu_size = s->sps->log2_min_pu_size;
1115
1116     int min_pu_width     = s->sps->min_pu_width;
1117     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1118     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1119     int i, j;
1120
1121     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1122         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1123             s->is_pcm[i + j * min_pu_width] = 2;
1124 }
1125
1126 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1127                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1128                               int log2_cb_size, int log2_trafo_size,
1129                               int trafo_depth, int blk_idx,
1130                               const int *base_cbf_cb, const int *base_cbf_cr)
1131 {
1132     HEVCLocalContext *lc = s->HEVClc;
1133     uint8_t split_transform_flag;
1134     int cbf_cb[2];
1135     int cbf_cr[2];
1136     int ret;
1137
1138     cbf_cb[0] = base_cbf_cb[0];
1139     cbf_cb[1] = base_cbf_cb[1];
1140     cbf_cr[0] = base_cbf_cr[0];
1141     cbf_cr[1] = base_cbf_cr[1];
1142
1143     if (lc->cu.intra_split_flag) {
1144         if (trafo_depth == 1) {
1145             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1146             if (s->sps->chroma_format_idc == 3) {
1147                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1148                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1149             } else {
1150                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1151                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1152             }
1153         }
1154     } else {
1155         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1156         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1157         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1158     }
1159
1160     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1161         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1162         trafo_depth     < lc->cu.max_trafo_depth       &&
1163         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1164         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1165     } else {
1166         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1167                           lc->cu.pred_mode == MODE_INTER &&
1168                           lc->cu.part_mode != PART_2Nx2N &&
1169                           trafo_depth == 0;
1170
1171         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1172                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1173                                inter_split;
1174     }
1175
1176     if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1177         if (trafo_depth == 0 || cbf_cb[0]) {
1178             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1179             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1180                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1181             }
1182         }
1183
1184         if (trafo_depth == 0 || cbf_cr[0]) {
1185             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1186             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1187                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1188             }
1189         }
1190     }
1191
1192     if (split_transform_flag) {
1193         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1194         const int x1 = x0 + trafo_size_split;
1195         const int y1 = y0 + trafo_size_split;
1196
1197 #define SUBDIVIDE(x, y, idx)                                                    \
1198 do {                                                                            \
1199     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1200                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1201                              cbf_cb, cbf_cr);                                   \
1202     if (ret < 0)                                                                \
1203         return ret;                                                             \
1204 } while (0)
1205
1206         SUBDIVIDE(x0, y0, 0);
1207         SUBDIVIDE(x1, y0, 1);
1208         SUBDIVIDE(x0, y1, 2);
1209         SUBDIVIDE(x1, y1, 3);
1210
1211 #undef SUBDIVIDE
1212     } else {
1213         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1214         int log2_min_tu_size = s->sps->log2_min_tb_size;
1215         int min_tu_width     = s->sps->min_tb_width;
1216         int cbf_luma         = 1;
1217
1218         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1219             cbf_cb[0] || cbf_cr[0] ||
1220             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1221             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1222         }
1223
1224         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1225                                  log2_cb_size, log2_trafo_size,
1226                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1227         if (ret < 0)
1228             return ret;
1229         // TODO: store cbf_luma somewhere else
1230         if (cbf_luma) {
1231             int i, j;
1232             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1233                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1234                     int x_tu = (x0 + j) >> log2_min_tu_size;
1235                     int y_tu = (y0 + i) >> log2_min_tu_size;
1236                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1237                 }
1238         }
1239         if (!s->sh.disable_deblocking_filter_flag) {
1240             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1241             if (s->pps->transquant_bypass_enable_flag &&
1242                 lc->cu.cu_transquant_bypass_flag)
1243                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1244         }
1245     }
1246     return 0;
1247 }
1248
1249 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1250 {
1251     HEVCLocalContext *lc = s->HEVClc;
1252     GetBitContext gb;
1253     int cb_size   = 1 << log2_cb_size;
1254     int stride0   = s->frame->linesize[0];
1255     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1256     int   stride1 = s->frame->linesize[1];
1257     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1258     int   stride2 = s->frame->linesize[2];
1259     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1260
1261     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1262                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1263                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1264                           s->sps->pcm.bit_depth_chroma;
1265     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1266     int ret;
1267
1268     if (!s->sh.disable_deblocking_filter_flag)
1269         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1270
1271     ret = init_get_bits(&gb, pcm, length);
1272     if (ret < 0)
1273         return ret;
1274
1275     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1276     if (s->sps->chroma_format_idc) {
1277         s->hevcdsp.put_pcm(dst1, stride1,
1278                            cb_size >> s->sps->hshift[1],
1279                            cb_size >> s->sps->vshift[1],
1280                            &gb, s->sps->pcm.bit_depth_chroma);
1281         s->hevcdsp.put_pcm(dst2, stride2,
1282                            cb_size >> s->sps->hshift[2],
1283                            cb_size >> s->sps->vshift[2],
1284                            &gb, s->sps->pcm.bit_depth_chroma);
1285     }
1286
1287     return 0;
1288 }
1289
1290 /**
1291  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1292  *
1293  * @param s HEVC decoding context
1294  * @param dst target buffer for block data at block position
1295  * @param dststride stride of the dst buffer
1296  * @param ref reference picture buffer at origin (0, 0)
1297  * @param mv motion vector (relative to block position) to get pixel data from
1298  * @param x_off horizontal position of block from origin (0, 0)
1299  * @param y_off vertical position of block from origin (0, 0)
1300  * @param block_w width of block
1301  * @param block_h height of block
1302  * @param luma_weight weighting factor applied to the luma prediction
1303  * @param luma_offset additive offset applied to the luma prediction value
1304  */
1305
1306 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1307                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1308                         int block_w, int block_h, int luma_weight, int luma_offset)
1309 {
1310     HEVCLocalContext *lc = s->HEVClc;
1311     uint8_t *src         = ref->data[0];
1312     ptrdiff_t srcstride  = ref->linesize[0];
1313     int pic_width        = s->sps->width;
1314     int pic_height       = s->sps->height;
1315     int mx               = mv->x & 3;
1316     int my               = mv->y & 3;
1317     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1318                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1319     int idx              = ff_hevc_pel_weight[block_w];
1320
1321     x_off += mv->x >> 2;
1322     y_off += mv->y >> 2;
1323     src   += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1324
1325     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1326         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1327         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1328         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1329         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1330         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1331
1332         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1333                                  edge_emu_stride, srcstride,
1334                                  block_w + QPEL_EXTRA,
1335                                  block_h + QPEL_EXTRA,
1336                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1337                                  pic_width, pic_height);
1338         src = lc->edge_emu_buffer + buf_offset;
1339         srcstride = edge_emu_stride;
1340     }
1341
1342     if (!weight_flag)
1343         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1344                                                       block_h, mx, my, block_w);
1345     else
1346         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1347                                                         block_h, s->sh.luma_log2_weight_denom,
1348                                                         luma_weight, luma_offset, mx, my, block_w);
1349 }
1350
1351 /**
1352  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1353  *
1354  * @param s HEVC decoding context
1355  * @param dst target buffer for block data at block position
1356  * @param dststride stride of the dst buffer
1357  * @param ref0 reference picture0 buffer at origin (0, 0)
1358  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1359  * @param x_off horizontal position of block from origin (0, 0)
1360  * @param y_off vertical position of block from origin (0, 0)
1361  * @param block_w width of block
1362  * @param block_h height of block
1363  * @param ref1 reference picture1 buffer at origin (0, 0)
1364  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1365  * @param current_mv current motion vector structure
1366  */
1367  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1368                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1369                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1370 {
1371     HEVCLocalContext *lc = s->HEVClc;
1372     ptrdiff_t src0stride  = ref0->linesize[0];
1373     ptrdiff_t src1stride  = ref1->linesize[0];
1374     int pic_width        = s->sps->width;
1375     int pic_height       = s->sps->height;
1376     int mx0              = mv0->x & 3;
1377     int my0              = mv0->y & 3;
1378     int mx1              = mv1->x & 3;
1379     int my1              = mv1->y & 3;
1380     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1381                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1382     int x_off0           = x_off + (mv0->x >> 2);
1383     int y_off0           = y_off + (mv0->y >> 2);
1384     int x_off1           = x_off + (mv1->x >> 2);
1385     int y_off1           = y_off + (mv1->y >> 2);
1386     int idx              = ff_hevc_pel_weight[block_w];
1387
1388     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1389     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1390
1391     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1392         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1393         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1394         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1395         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1396         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1397
1398         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1399                                  edge_emu_stride, src0stride,
1400                                  block_w + QPEL_EXTRA,
1401                                  block_h + QPEL_EXTRA,
1402                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1403                                  pic_width, pic_height);
1404         src0 = lc->edge_emu_buffer + buf_offset;
1405         src0stride = edge_emu_stride;
1406     }
1407
1408     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1409         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1410         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1411         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1412         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1413         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1414
1415         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1416                                  edge_emu_stride, src1stride,
1417                                  block_w + QPEL_EXTRA,
1418                                  block_h + QPEL_EXTRA,
1419                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1420                                  pic_width, pic_height);
1421         src1 = lc->edge_emu_buffer2 + buf_offset;
1422         src1stride = edge_emu_stride;
1423     }
1424
1425     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1426                                                 block_h, mx0, my0, block_w);
1427     if (!weight_flag)
1428         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1429                                                        block_h, mx1, my1, block_w);
1430     else
1431         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1432                                                          block_h, s->sh.luma_log2_weight_denom,
1433                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1434                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1435                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1436                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1437                                                          mx1, my1, block_w);
1438
1439 }
1440
1441 /**
1442  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1443  *
1444  * @param s HEVC decoding context
1445  * @param dst1 target buffer for block data at block position (U plane)
1446  * @param dst2 target buffer for block data at block position (V plane)
1447  * @param dststride stride of the dst1 and dst2 buffers
1448  * @param ref reference picture buffer at origin (0, 0)
1449  * @param mv motion vector (relative to block position) to get pixel data from
1450  * @param x_off horizontal position of block from origin (0, 0)
1451  * @param y_off vertical position of block from origin (0, 0)
1452  * @param block_w width of block
1453  * @param block_h height of block
1454  * @param chroma_weight weighting factor applied to the chroma prediction
1455  * @param chroma_offset additive offset applied to the chroma prediction value
1456  */
1457
1458 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1459                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1460                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1461 {
1462     HEVCLocalContext *lc = s->HEVClc;
1463     int pic_width        = s->sps->width >> s->sps->hshift[1];
1464     int pic_height       = s->sps->height >> s->sps->vshift[1];
1465     const Mv *mv         = &current_mv->mv[reflist];
1466     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1467                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1468     int idx              = ff_hevc_pel_weight[block_w];
1469     int hshift           = s->sps->hshift[1];
1470     int vshift           = s->sps->vshift[1];
1471     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1472     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1473     intptr_t _mx         = mx << (1 - hshift);
1474     intptr_t _my         = my << (1 - vshift);
1475
1476     x_off += mv->x >> (2 + hshift);
1477     y_off += mv->y >> (2 + vshift);
1478     src0  += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1479
1480     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1481         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1482         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1483         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1484         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1485         int buf_offset0 = EPEL_EXTRA_BEFORE *
1486                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1487         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1488                                  edge_emu_stride, srcstride,
1489                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1490                                  x_off - EPEL_EXTRA_BEFORE,
1491                                  y_off - EPEL_EXTRA_BEFORE,
1492                                  pic_width, pic_height);
1493
1494         src0 = lc->edge_emu_buffer + buf_offset0;
1495         srcstride = edge_emu_stride;
1496     }
1497     if (!weight_flag)
1498         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1499                                                   block_h, _mx, _my, block_w);
1500     else
1501         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1502                                                         block_h, s->sh.chroma_log2_weight_denom,
1503                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1504 }
1505
1506 /**
1507  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1508  *
1509  * @param s HEVC decoding context
1510  * @param dst target buffer for block data at block position
1511  * @param dststride stride of the dst buffer
1512  * @param ref0 reference picture0 buffer at origin (0, 0)
1513  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1514  * @param x_off horizontal position of block from origin (0, 0)
1515  * @param y_off vertical position of block from origin (0, 0)
1516  * @param block_w width of block
1517  * @param block_h height of block
1518  * @param ref1 reference picture1 buffer at origin (0, 0)
1519  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1520  * @param current_mv current motion vector structure
1521  * @param cidx chroma component(cb, cr)
1522  */
1523 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1524                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1525 {
1526     HEVCLocalContext *lc = s->HEVClc;
1527     uint8_t *src1        = ref0->data[cidx+1];
1528     uint8_t *src2        = ref1->data[cidx+1];
1529     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1530     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1531     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1532                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1533     int pic_width        = s->sps->width >> s->sps->hshift[1];
1534     int pic_height       = s->sps->height >> s->sps->vshift[1];
1535     Mv *mv0              = &current_mv->mv[0];
1536     Mv *mv1              = &current_mv->mv[1];
1537     int hshift = s->sps->hshift[1];
1538     int vshift = s->sps->vshift[1];
1539
1540     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1541     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1542     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1543     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1544     intptr_t _mx0 = mx0 << (1 - hshift);
1545     intptr_t _my0 = my0 << (1 - vshift);
1546     intptr_t _mx1 = mx1 << (1 - hshift);
1547     intptr_t _my1 = my1 << (1 - vshift);
1548
1549     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1550     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1551     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1552     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1553     int idx = ff_hevc_pel_weight[block_w];
1554     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1555     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1556
1557     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1558         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1559         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1560         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1561         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1562         int buf_offset1 = EPEL_EXTRA_BEFORE *
1563                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1564
1565         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1566                                  edge_emu_stride, src1stride,
1567                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1568                                  x_off0 - EPEL_EXTRA_BEFORE,
1569                                  y_off0 - EPEL_EXTRA_BEFORE,
1570                                  pic_width, pic_height);
1571
1572         src1 = lc->edge_emu_buffer + buf_offset1;
1573         src1stride = edge_emu_stride;
1574     }
1575
1576     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1577         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1578         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1579         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1580         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1581         int buf_offset1 = EPEL_EXTRA_BEFORE *
1582                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1583
1584         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1585                                  edge_emu_stride, src2stride,
1586                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1587                                  x_off1 - EPEL_EXTRA_BEFORE,
1588                                  y_off1 - EPEL_EXTRA_BEFORE,
1589                                  pic_width, pic_height);
1590
1591         src2 = lc->edge_emu_buffer2 + buf_offset1;
1592         src2stride = edge_emu_stride;
1593     }
1594
1595     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1596                                                 block_h, _mx0, _my0, block_w);
1597     if (!weight_flag)
1598         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1599                                                        src2, src2stride, lc->tmp,
1600                                                        block_h, _mx1, _my1, block_w);
1601     else
1602         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1603                                                          src2, src2stride, lc->tmp,
1604                                                          block_h,
1605                                                          s->sh.chroma_log2_weight_denom,
1606                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1607                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1608                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1609                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1610                                                          _mx1, _my1, block_w);
1611 }
1612
1613 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1614                                 const Mv *mv, int y0, int height)
1615 {
1616     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1617
1618     if (s->threads_type == FF_THREAD_FRAME )
1619         ff_thread_await_progress(&ref->tf, y, 0);
1620 }
1621
1622 static void hevc_luma_mv_mpv_mode(HEVCContext *s, int x0, int y0, int nPbW,
1623                                   int nPbH, int log2_cb_size, int part_idx,
1624                                   int merge_idx, MvField *mv)
1625 {
1626     HEVCLocalContext *lc = s->HEVClc;
1627     enum InterPredIdc inter_pred_idc = PRED_L0;
1628     int mvp_flag;
1629
1630     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1631     mv->pred_flag = 0;
1632     if (s->sh.slice_type == B_SLICE)
1633         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1634
1635     if (inter_pred_idc != PRED_L1) {
1636         if (s->sh.nb_refs[L0])
1637             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1638
1639         mv->pred_flag = PF_L0;
1640         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1641         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1642         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1643                                  part_idx, merge_idx, mv, mvp_flag, 0);
1644         mv->mv[0].x += lc->pu.mvd.x;
1645         mv->mv[0].y += lc->pu.mvd.y;
1646     }
1647
1648     if (inter_pred_idc != PRED_L0) {
1649         if (s->sh.nb_refs[L1])
1650             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1651
1652         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1653             AV_ZERO32(&lc->pu.mvd);
1654         } else {
1655             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1656         }
1657
1658         mv->pred_flag += PF_L1;
1659         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1660         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1661                                  part_idx, merge_idx, mv, mvp_flag, 1);
1662         mv->mv[1].x += lc->pu.mvd.x;
1663         mv->mv[1].y += lc->pu.mvd.y;
1664     }
1665 }
1666
1667 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1668                                 int nPbW, int nPbH,
1669                                 int log2_cb_size, int partIdx, int idx)
1670 {
1671 #define POS(c_idx, x, y)                                                              \
1672     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1673                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1674     HEVCLocalContext *lc = s->HEVClc;
1675     int merge_idx = 0;
1676     struct MvField current_mv = {{{ 0 }}};
1677
1678     int min_pu_width = s->sps->min_pu_width;
1679
1680     MvField *tab_mvf = s->ref->tab_mvf;
1681     RefPicList  *refPicList = s->ref->refPicList;
1682     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1683     uint8_t *dst0 = POS(0, x0, y0);
1684     uint8_t *dst1 = POS(1, x0, y0);
1685     uint8_t *dst2 = POS(2, x0, y0);
1686     int log2_min_cb_size = s->sps->log2_min_cb_size;
1687     int min_cb_width     = s->sps->min_cb_width;
1688     int x_cb             = x0 >> log2_min_cb_size;
1689     int y_cb             = y0 >> log2_min_cb_size;
1690     int x_pu, y_pu;
1691     int i, j;
1692
1693     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1694
1695     if (!skip_flag)
1696         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1697
1698     if (skip_flag || lc->pu.merge_flag) {
1699         if (s->sh.max_num_merge_cand > 1)
1700             merge_idx = ff_hevc_merge_idx_decode(s);
1701         else
1702             merge_idx = 0;
1703
1704         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1705                                    partIdx, merge_idx, &current_mv);
1706     } else {
1707         hevc_luma_mv_mpv_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1708                               partIdx, merge_idx, &current_mv);
1709     }
1710
1711     x_pu = x0 >> s->sps->log2_min_pu_size;
1712     y_pu = y0 >> s->sps->log2_min_pu_size;
1713
1714     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1715         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1716             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1717
1718     if (current_mv.pred_flag & PF_L0) {
1719         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1720         if (!ref0)
1721             return;
1722         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1723     }
1724     if (current_mv.pred_flag & PF_L1) {
1725         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1726         if (!ref1)
1727             return;
1728         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1729     }
1730
1731     if (current_mv.pred_flag == PF_L0) {
1732         int x0_c = x0 >> s->sps->hshift[1];
1733         int y0_c = y0 >> s->sps->vshift[1];
1734         int nPbW_c = nPbW >> s->sps->hshift[1];
1735         int nPbH_c = nPbH >> s->sps->vshift[1];
1736
1737         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1738                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1739                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1740                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1741
1742         if (s->sps->chroma_format_idc) {
1743             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1744                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1745                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1746             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1747                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1748                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1749         }
1750     } else if (current_mv.pred_flag == PF_L1) {
1751         int x0_c = x0 >> s->sps->hshift[1];
1752         int y0_c = y0 >> s->sps->vshift[1];
1753         int nPbW_c = nPbW >> s->sps->hshift[1];
1754         int nPbH_c = nPbH >> s->sps->vshift[1];
1755
1756         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1757                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1758                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1759                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1760
1761         if (s->sps->chroma_format_idc) {
1762             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1763                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1764                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1765
1766             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1767                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1768                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1769         }
1770     } else if (current_mv.pred_flag == PF_BI) {
1771         int x0_c = x0 >> s->sps->hshift[1];
1772         int y0_c = y0 >> s->sps->vshift[1];
1773         int nPbW_c = nPbW >> s->sps->hshift[1];
1774         int nPbH_c = nPbH >> s->sps->vshift[1];
1775
1776         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1777                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1778                    ref1->frame, &current_mv.mv[1], &current_mv);
1779
1780         if (s->sps->chroma_format_idc) {
1781             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1782                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1783
1784             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1785                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1786         }
1787     }
1788 }
1789
1790 /**
1791  * 8.4.1
1792  */
1793 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1794                                 int prev_intra_luma_pred_flag)
1795 {
1796     HEVCLocalContext *lc = s->HEVClc;
1797     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1798     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1799     int min_pu_width     = s->sps->min_pu_width;
1800     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1801     int x0b              = av_mod_uintp2(x0, s->sps->log2_ctb_size);
1802     int y0b              = av_mod_uintp2(y0, s->sps->log2_ctb_size);
1803
1804     int cand_up   = (lc->ctb_up_flag || y0b) ?
1805                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1806     int cand_left = (lc->ctb_left_flag || x0b) ?
1807                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1808
1809     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1810
1811     MvField *tab_mvf = s->ref->tab_mvf;
1812     int intra_pred_mode;
1813     int candidate[3];
1814     int i, j;
1815
1816     // intra_pred_mode prediction does not cross vertical CTB boundaries
1817     if ((y0 - 1) < y_ctb)
1818         cand_up = INTRA_DC;
1819
1820     if (cand_left == cand_up) {
1821         if (cand_left < 2) {
1822             candidate[0] = INTRA_PLANAR;
1823             candidate[1] = INTRA_DC;
1824             candidate[2] = INTRA_ANGULAR_26;
1825         } else {
1826             candidate[0] = cand_left;
1827             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1828             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1829         }
1830     } else {
1831         candidate[0] = cand_left;
1832         candidate[1] = cand_up;
1833         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1834             candidate[2] = INTRA_PLANAR;
1835         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1836             candidate[2] = INTRA_DC;
1837         } else {
1838             candidate[2] = INTRA_ANGULAR_26;
1839         }
1840     }
1841
1842     if (prev_intra_luma_pred_flag) {
1843         intra_pred_mode = candidate[lc->pu.mpm_idx];
1844     } else {
1845         if (candidate[0] > candidate[1])
1846             FFSWAP(uint8_t, candidate[0], candidate[1]);
1847         if (candidate[0] > candidate[2])
1848             FFSWAP(uint8_t, candidate[0], candidate[2]);
1849         if (candidate[1] > candidate[2])
1850             FFSWAP(uint8_t, candidate[1], candidate[2]);
1851
1852         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1853         for (i = 0; i < 3; i++)
1854             if (intra_pred_mode >= candidate[i])
1855                 intra_pred_mode++;
1856     }
1857
1858     /* write the intra prediction units into the mv array */
1859     if (!size_in_pus)
1860         size_in_pus = 1;
1861     for (i = 0; i < size_in_pus; i++) {
1862         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1863                intra_pred_mode, size_in_pus);
1864
1865         for (j = 0; j < size_in_pus; j++) {
1866             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1867         }
1868     }
1869
1870     return intra_pred_mode;
1871 }
1872
1873 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1874                                           int log2_cb_size, int ct_depth)
1875 {
1876     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1877     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1878     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1879     int y;
1880
1881     for (y = 0; y < length; y++)
1882         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1883                ct_depth, length);
1884 }
1885
1886 static const uint8_t tab_mode_idx[] = {
1887      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1888     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1889
1890 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1891                                   int log2_cb_size)
1892 {
1893     HEVCLocalContext *lc = s->HEVClc;
1894     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1895     uint8_t prev_intra_luma_pred_flag[4];
1896     int split   = lc->cu.part_mode == PART_NxN;
1897     int pb_size = (1 << log2_cb_size) >> split;
1898     int side    = split + 1;
1899     int chroma_mode;
1900     int i, j;
1901
1902     for (i = 0; i < side; i++)
1903         for (j = 0; j < side; j++)
1904             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1905
1906     for (i = 0; i < side; i++) {
1907         for (j = 0; j < side; j++) {
1908             if (prev_intra_luma_pred_flag[2 * i + j])
1909                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1910             else
1911                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1912
1913             lc->pu.intra_pred_mode[2 * i + j] =
1914                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1915                                      prev_intra_luma_pred_flag[2 * i + j]);
1916         }
1917     }
1918
1919     if (s->sps->chroma_format_idc == 3) {
1920         for (i = 0; i < side; i++) {
1921             for (j = 0; j < side; j++) {
1922                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1923                 if (chroma_mode != 4) {
1924                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1925                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1926                     else
1927                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1928                 } else {
1929                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1930                 }
1931             }
1932         }
1933     } else if (s->sps->chroma_format_idc == 2) {
1934         int mode_idx;
1935         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1936         if (chroma_mode != 4) {
1937             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1938                 mode_idx = 34;
1939             else
1940                 mode_idx = intra_chroma_table[chroma_mode];
1941         } else {
1942             mode_idx = lc->pu.intra_pred_mode[0];
1943         }
1944         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1945     } else if (s->sps->chroma_format_idc != 0) {
1946         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1947         if (chroma_mode != 4) {
1948             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1949                 lc->pu.intra_pred_mode_c[0] = 34;
1950             else
1951                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1952         } else {
1953             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1954         }
1955     }
1956 }
1957
1958 static void intra_prediction_unit_default_value(HEVCContext *s,
1959                                                 int x0, int y0,
1960                                                 int log2_cb_size)
1961 {
1962     HEVCLocalContext *lc = s->HEVClc;
1963     int pb_size          = 1 << log2_cb_size;
1964     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1965     int min_pu_width     = s->sps->min_pu_width;
1966     MvField *tab_mvf     = s->ref->tab_mvf;
1967     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1968     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1969     int j, k;
1970
1971     if (size_in_pus == 0)
1972         size_in_pus = 1;
1973     for (j = 0; j < size_in_pus; j++)
1974         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1975     if (lc->cu.pred_mode == MODE_INTRA)
1976         for (j = 0; j < size_in_pus; j++)
1977             for (k = 0; k < size_in_pus; k++)
1978                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1979 }
1980
1981 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1982 {
1983     int cb_size          = 1 << log2_cb_size;
1984     HEVCLocalContext *lc = s->HEVClc;
1985     int log2_min_cb_size = s->sps->log2_min_cb_size;
1986     int length           = cb_size >> log2_min_cb_size;
1987     int min_cb_width     = s->sps->min_cb_width;
1988     int x_cb             = x0 >> log2_min_cb_size;
1989     int y_cb             = y0 >> log2_min_cb_size;
1990     int idx              = log2_cb_size - 2;
1991     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1992     int x, y, ret;
1993
1994     lc->cu.x                = x0;
1995     lc->cu.y                = y0;
1996     lc->cu.pred_mode        = MODE_INTRA;
1997     lc->cu.part_mode        = PART_2Nx2N;
1998     lc->cu.intra_split_flag = 0;
1999
2000     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2001     for (x = 0; x < 4; x++)
2002         lc->pu.intra_pred_mode[x] = 1;
2003     if (s->pps->transquant_bypass_enable_flag) {
2004         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2005         if (lc->cu.cu_transquant_bypass_flag)
2006             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2007     } else
2008         lc->cu.cu_transquant_bypass_flag = 0;
2009
2010     if (s->sh.slice_type != I_SLICE) {
2011         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2012
2013         x = y_cb * min_cb_width + x_cb;
2014         for (y = 0; y < length; y++) {
2015             memset(&s->skip_flag[x], skip_flag, length);
2016             x += min_cb_width;
2017         }
2018         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2019     } else {
2020         x = y_cb * min_cb_width + x_cb;
2021         for (y = 0; y < length; y++) {
2022             memset(&s->skip_flag[x], 0, length);
2023             x += min_cb_width;
2024         }
2025     }
2026
2027     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2028         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2029         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2030
2031         if (!s->sh.disable_deblocking_filter_flag)
2032             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2033     } else {
2034         int pcm_flag = 0;
2035
2036         if (s->sh.slice_type != I_SLICE)
2037             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2038         if (lc->cu.pred_mode != MODE_INTRA ||
2039             log2_cb_size == s->sps->log2_min_cb_size) {
2040             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2041             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2042                                       lc->cu.pred_mode == MODE_INTRA;
2043         }
2044
2045         if (lc->cu.pred_mode == MODE_INTRA) {
2046             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2047                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2048                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2049                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2050             }
2051             if (pcm_flag) {
2052                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2053                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2054                 if (s->sps->pcm.loop_filter_disable_flag)
2055                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2056
2057                 if (ret < 0)
2058                     return ret;
2059             } else {
2060                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2061             }
2062         } else {
2063             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2064             switch (lc->cu.part_mode) {
2065             case PART_2Nx2N:
2066                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2067                 break;
2068             case PART_2NxN:
2069                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2070                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2071                 break;
2072             case PART_Nx2N:
2073                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2074                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2075                 break;
2076             case PART_2NxnU:
2077                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2078                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2079                 break;
2080             case PART_2NxnD:
2081                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2082                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2083                 break;
2084             case PART_nLx2N:
2085                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2086                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2087                 break;
2088             case PART_nRx2N:
2089                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2090                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2091                 break;
2092             case PART_NxN:
2093                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2094                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2095                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2096                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2097                 break;
2098             }
2099         }
2100
2101         if (!pcm_flag) {
2102             int rqt_root_cbf = 1;
2103
2104             if (lc->cu.pred_mode != MODE_INTRA &&
2105                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2106                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2107             }
2108             if (rqt_root_cbf) {
2109                 const static int cbf[2] = { 0 };
2110                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2111                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2112                                          s->sps->max_transform_hierarchy_depth_inter;
2113                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2114                                          log2_cb_size,
2115                                          log2_cb_size, 0, 0, cbf, cbf);
2116                 if (ret < 0)
2117                     return ret;
2118             } else {
2119                 if (!s->sh.disable_deblocking_filter_flag)
2120                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2121             }
2122         }
2123     }
2124
2125     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2126         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2127
2128     x = y_cb * min_cb_width + x_cb;
2129     for (y = 0; y < length; y++) {
2130         memset(&s->qp_y_tab[x], lc->qp_y, length);
2131         x += min_cb_width;
2132     }
2133
2134     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2135        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2136         lc->qPy_pred = lc->qp_y;
2137     }
2138
2139     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2140
2141     return 0;
2142 }
2143
2144 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2145                                int log2_cb_size, int cb_depth)
2146 {
2147     HEVCLocalContext *lc = s->HEVClc;
2148     const int cb_size    = 1 << log2_cb_size;
2149     int ret;
2150     int split_cu;
2151
2152     lc->ct_depth = cb_depth;
2153     if (x0 + cb_size <= s->sps->width  &&
2154         y0 + cb_size <= s->sps->height &&
2155         log2_cb_size > s->sps->log2_min_cb_size) {
2156         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2157     } else {
2158         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2159     }
2160     if (s->pps->cu_qp_delta_enabled_flag &&
2161         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2162         lc->tu.is_cu_qp_delta_coded = 0;
2163         lc->tu.cu_qp_delta          = 0;
2164     }
2165
2166     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2167         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2168         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2169     }
2170
2171     if (split_cu) {
2172         int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2173         const int cb_size_split = cb_size >> 1;
2174         const int x1 = x0 + cb_size_split;
2175         const int y1 = y0 + cb_size_split;
2176
2177         int more_data = 0;
2178
2179         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2180         if (more_data < 0)
2181             return more_data;
2182
2183         if (more_data && x1 < s->sps->width) {
2184             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2185             if (more_data < 0)
2186                 return more_data;
2187         }
2188         if (more_data && y1 < s->sps->height) {
2189             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2190             if (more_data < 0)
2191                 return more_data;
2192         }
2193         if (more_data && x1 < s->sps->width &&
2194             y1 < s->sps->height) {
2195             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2196             if (more_data < 0)
2197                 return more_data;
2198         }
2199
2200         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2201             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2202             lc->qPy_pred = lc->qp_y;
2203
2204         if (more_data)
2205             return ((x1 + cb_size_split) < s->sps->width ||
2206                     (y1 + cb_size_split) < s->sps->height);
2207         else
2208             return 0;
2209     } else {
2210         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2211         if (ret < 0)
2212             return ret;
2213         if ((!((x0 + cb_size) %
2214                (1 << (s->sps->log2_ctb_size))) ||
2215              (x0 + cb_size >= s->sps->width)) &&
2216             (!((y0 + cb_size) %
2217                (1 << (s->sps->log2_ctb_size))) ||
2218              (y0 + cb_size >= s->sps->height))) {
2219             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2220             return !end_of_slice_flag;
2221         } else {
2222             return 1;
2223         }
2224     }
2225
2226     return 0;
2227 }
2228
2229 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2230                                  int ctb_addr_ts)
2231 {
2232     HEVCLocalContext *lc  = s->HEVClc;
2233     int ctb_size          = 1 << s->sps->log2_ctb_size;
2234     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2235     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2236
2237     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2238
2239     if (s->pps->entropy_coding_sync_enabled_flag) {
2240         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2241             lc->first_qp_group = 1;
2242         lc->end_of_tiles_x = s->sps->width;
2243     } else if (s->pps->tiles_enabled_flag) {
2244         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2245             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2246             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2247             lc->first_qp_group   = 1;
2248         }
2249     } else {
2250         lc->end_of_tiles_x = s->sps->width;
2251     }
2252
2253     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2254
2255     lc->boundary_flags = 0;
2256     if (s->pps->tiles_enabled_flag) {
2257         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2258             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2259         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2260             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2261         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2262             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2263         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2264             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2265     } else {
2266         if (ctb_addr_in_slice <= 0)
2267             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2268         if (ctb_addr_in_slice < s->sps->ctb_width)
2269             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2270     }
2271
2272     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2273     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2274     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2275     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2276 }
2277
2278 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2279 {
2280     HEVCContext *s  = avctxt->priv_data;
2281     int ctb_size    = 1 << s->sps->log2_ctb_size;
2282     int more_data   = 1;
2283     int x_ctb       = 0;
2284     int y_ctb       = 0;
2285     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2286
2287     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2288         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2289         return AVERROR_INVALIDDATA;
2290     }
2291
2292     if (s->sh.dependent_slice_segment_flag) {
2293         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2294         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2295             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2296             return AVERROR_INVALIDDATA;
2297         }
2298     }
2299
2300     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2301         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2302
2303         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2304         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2305         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2306
2307         ff_hevc_cabac_init(s, ctb_addr_ts);
2308
2309         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2310
2311         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2312         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2313         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2314
2315         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2316         if (more_data < 0) {
2317             s->tab_slice_address[ctb_addr_rs] = -1;
2318             return more_data;
2319         }
2320
2321
2322         ctb_addr_ts++;
2323         ff_hevc_save_states(s, ctb_addr_ts);
2324         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2325     }
2326
2327     if (x_ctb + ctb_size >= s->sps->width &&
2328         y_ctb + ctb_size >= s->sps->height)
2329         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2330
2331     return ctb_addr_ts;
2332 }
2333
2334 static int hls_slice_data(HEVCContext *s)
2335 {
2336     int arg[2];
2337     int ret[2];
2338
2339     arg[0] = 0;
2340     arg[1] = 1;
2341
2342     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2343     return ret[0];
2344 }
2345 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2346 {
2347     HEVCContext *s1  = avctxt->priv_data, *s;
2348     HEVCLocalContext *lc;
2349     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2350     int more_data   = 1;
2351     int *ctb_row_p    = input_ctb_row;
2352     int ctb_row = ctb_row_p[job];
2353     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2354     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2355     int thread = ctb_row % s1->threads_number;
2356     int ret;
2357
2358     s = s1->sList[self_id];
2359     lc = s->HEVClc;
2360
2361     if(ctb_row) {
2362         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2363
2364         if (ret < 0)
2365             return ret;
2366         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2367     }
2368
2369     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2370         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2371         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2372
2373         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2374
2375         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2376
2377         if (avpriv_atomic_int_get(&s1->wpp_err)){
2378             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2379             return 0;
2380         }
2381
2382         ff_hevc_cabac_init(s, ctb_addr_ts);
2383         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2384         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2385
2386         if (more_data < 0) {
2387             s->tab_slice_address[ctb_addr_rs] = -1;
2388             return more_data;
2389         }
2390
2391         ctb_addr_ts++;
2392
2393         ff_hevc_save_states(s, ctb_addr_ts);
2394         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2395         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2396
2397         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2398             avpriv_atomic_int_set(&s1->wpp_err,  1);
2399             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2400             return 0;
2401         }
2402
2403         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2404             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2405             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2406             return ctb_addr_ts;
2407         }
2408         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2409         x_ctb+=ctb_size;
2410
2411         if(x_ctb >= s->sps->width) {
2412             break;
2413         }
2414     }
2415     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2416
2417     return 0;
2418 }
2419
2420 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2421 {
2422     HEVCLocalContext *lc = s->HEVClc;
2423     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2424     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2425     int offset;
2426     int startheader, cmpt = 0;
2427     int i, j, res = 0;
2428
2429     if (!ret || !arg) {
2430         av_free(ret);
2431         av_free(arg);
2432         return AVERROR(ENOMEM);
2433     }
2434
2435
2436     if (!s->sList[1]) {
2437         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2438
2439
2440         for (i = 1; i < s->threads_number; i++) {
2441             s->sList[i] = av_malloc(sizeof(HEVCContext));
2442             memcpy(s->sList[i], s, sizeof(HEVCContext));
2443             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2444             s->sList[i]->HEVClc = s->HEVClcList[i];
2445         }
2446     }
2447
2448     offset = (lc->gb.index >> 3);
2449
2450     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2451         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2452             startheader--;
2453             cmpt++;
2454         }
2455     }
2456
2457     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2458         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2459         for (j = 0, cmpt = 0, startheader = offset
2460              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2461             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2462                 startheader--;
2463                 cmpt++;
2464             }
2465         }
2466         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2467         s->sh.offset[i - 1] = offset;
2468
2469     }
2470     if (s->sh.num_entry_point_offsets != 0) {
2471         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2472         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2473         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2474
2475     }
2476     s->data = nal;
2477
2478     for (i = 1; i < s->threads_number; i++) {
2479         s->sList[i]->HEVClc->first_qp_group = 1;
2480         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2481         memcpy(s->sList[i], s, sizeof(HEVCContext));
2482         s->sList[i]->HEVClc = s->HEVClcList[i];
2483     }
2484
2485     avpriv_atomic_int_set(&s->wpp_err, 0);
2486     ff_reset_entries(s->avctx);
2487
2488     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2489         arg[i] = i;
2490         ret[i] = 0;
2491     }
2492
2493     if (s->pps->entropy_coding_sync_enabled_flag)
2494         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2495
2496     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2497         res += ret[i];
2498     av_free(ret);
2499     av_free(arg);
2500     return res;
2501 }
2502
2503 /**
2504  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2505  * 0 if the unit should be skipped, 1 otherwise
2506  */
2507 static int hls_nal_unit(HEVCContext *s)
2508 {
2509     GetBitContext *gb = &s->HEVClc->gb;
2510     int nuh_layer_id;
2511
2512     if (get_bits1(gb) != 0)
2513         return AVERROR_INVALIDDATA;
2514
2515     s->nal_unit_type = get_bits(gb, 6);
2516
2517     nuh_layer_id   = get_bits(gb, 6);
2518     s->temporal_id = get_bits(gb, 3) - 1;
2519     if (s->temporal_id < 0)
2520         return AVERROR_INVALIDDATA;
2521
2522     av_log(s->avctx, AV_LOG_DEBUG,
2523            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2524            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2525
2526     return nuh_layer_id == 0;
2527 }
2528
2529 static int set_side_data(HEVCContext *s)
2530 {
2531     AVFrame *out = s->ref->frame;
2532
2533     if (s->sei_frame_packing_present &&
2534         s->frame_packing_arrangement_type >= 3 &&
2535         s->frame_packing_arrangement_type <= 5 &&
2536         s->content_interpretation_type > 0 &&
2537         s->content_interpretation_type < 3) {
2538         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2539         if (!stereo)
2540             return AVERROR(ENOMEM);
2541
2542         switch (s->frame_packing_arrangement_type) {
2543         case 3:
2544             if (s->quincunx_subsampling)
2545                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2546             else
2547                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2548             break;
2549         case 4:
2550             stereo->type = AV_STEREO3D_TOPBOTTOM;
2551             break;
2552         case 5:
2553             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2554             break;
2555         }
2556
2557         if (s->content_interpretation_type == 2)
2558             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2559     }
2560
2561     if (s->sei_display_orientation_present &&
2562         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2563         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2564         AVFrameSideData *rotation = av_frame_new_side_data(out,
2565                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2566                                                            sizeof(int32_t) * 9);
2567         if (!rotation)
2568             return AVERROR(ENOMEM);
2569
2570         av_display_rotation_set((int32_t *)rotation->data, angle);
2571         av_display_matrix_flip((int32_t *)rotation->data,
2572                                s->sei_hflip, s->sei_vflip);
2573     }
2574
2575     return 0;
2576 }
2577
2578 static int hevc_frame_start(HEVCContext *s)
2579 {
2580     HEVCLocalContext *lc = s->HEVClc;
2581     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2582                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2583     int ret;
2584
2585     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2586     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2587     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2588     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2589     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2590
2591     s->is_decoded        = 0;
2592     s->first_nal_type    = s->nal_unit_type;
2593
2594     if (s->pps->tiles_enabled_flag)
2595         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2596
2597     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2598     if (ret < 0)
2599         goto fail;
2600
2601     ret = ff_hevc_frame_rps(s);
2602     if (ret < 0) {
2603         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2604         goto fail;
2605     }
2606
2607     s->ref->frame->key_frame = IS_IRAP(s);
2608
2609     ret = set_side_data(s);
2610     if (ret < 0)
2611         goto fail;
2612
2613     s->frame->pict_type = 3 - s->sh.slice_type;
2614
2615     if (!IS_IRAP(s))
2616         ff_hevc_bump_frame(s);
2617
2618     av_frame_unref(s->output_frame);
2619     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2620     if (ret < 0)
2621         goto fail;
2622
2623     if (!s->avctx->hwaccel)
2624         ff_thread_finish_setup(s->avctx);
2625
2626     return 0;
2627
2628 fail:
2629     if (s->ref)
2630         ff_hevc_unref_frame(s, s->ref, ~0);
2631     s->ref = NULL;
2632     return ret;
2633 }
2634
2635 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2636 {
2637     HEVCLocalContext *lc = s->HEVClc;
2638     GetBitContext *gb    = &lc->gb;
2639     int ctb_addr_ts, ret;
2640
2641     ret = init_get_bits8(gb, nal->data, nal->size);
2642     if (ret < 0)
2643         return ret;
2644
2645     ret = hls_nal_unit(s);
2646     if (ret < 0) {
2647         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2648                s->nal_unit_type);
2649         goto fail;
2650     } else if (!ret)
2651         return 0;
2652
2653     switch (s->nal_unit_type) {
2654     case NAL_VPS:
2655         ret = ff_hevc_decode_nal_vps(s);
2656         if (ret < 0)
2657             goto fail;
2658         break;
2659     case NAL_SPS:
2660         ret = ff_hevc_decode_nal_sps(s);
2661         if (ret < 0)
2662             goto fail;
2663         break;
2664     case NAL_PPS:
2665         ret = ff_hevc_decode_nal_pps(s);
2666         if (ret < 0)
2667             goto fail;
2668         break;
2669     case NAL_SEI_PREFIX:
2670     case NAL_SEI_SUFFIX:
2671         ret = ff_hevc_decode_nal_sei(s);
2672         if (ret < 0)
2673             goto fail;
2674         break;
2675     case NAL_TRAIL_R:
2676     case NAL_TRAIL_N:
2677     case NAL_TSA_N:
2678     case NAL_TSA_R:
2679     case NAL_STSA_N:
2680     case NAL_STSA_R:
2681     case NAL_BLA_W_LP:
2682     case NAL_BLA_W_RADL:
2683     case NAL_BLA_N_LP:
2684     case NAL_IDR_W_RADL:
2685     case NAL_IDR_N_LP:
2686     case NAL_CRA_NUT:
2687     case NAL_RADL_N:
2688     case NAL_RADL_R:
2689     case NAL_RASL_N:
2690     case NAL_RASL_R:
2691         ret = hls_slice_header(s);
2692         if (ret < 0)
2693             return ret;
2694
2695         if (s->max_ra == INT_MAX) {
2696             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2697                 s->max_ra = s->poc;
2698             } else {
2699                 if (IS_IDR(s))
2700                     s->max_ra = INT_MIN;
2701             }
2702         }
2703
2704         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2705             s->poc <= s->max_ra) {
2706             s->is_decoded = 0;
2707             break;
2708         } else {
2709             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2710                 s->max_ra = INT_MIN;
2711         }
2712
2713         if (s->sh.first_slice_in_pic_flag) {
2714             ret = hevc_frame_start(s);
2715             if (ret < 0)
2716                 return ret;
2717         } else if (!s->ref) {
2718             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2719             goto fail;
2720         }
2721
2722         if (s->nal_unit_type != s->first_nal_type) {
2723             av_log(s->avctx, AV_LOG_ERROR,
2724                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2725                    s->first_nal_type, s->nal_unit_type);
2726             return AVERROR_INVALIDDATA;
2727         }
2728
2729         if (!s->sh.dependent_slice_segment_flag &&
2730             s->sh.slice_type != I_SLICE) {
2731             ret = ff_hevc_slice_rpl(s);
2732             if (ret < 0) {
2733                 av_log(s->avctx, AV_LOG_WARNING,
2734                        "Error constructing the reference lists for the current slice.\n");
2735                 goto fail;
2736             }
2737         }
2738
2739         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2740             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2741             if (ret < 0)
2742                 goto fail;
2743         }
2744
2745         if (s->avctx->hwaccel) {
2746             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2747             if (ret < 0)
2748                 goto fail;
2749         } else {
2750             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2751                 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2752             else
2753                 ctb_addr_ts = hls_slice_data(s);
2754             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2755                 s->is_decoded = 1;
2756             }
2757
2758             if (ctb_addr_ts < 0) {
2759                 ret = ctb_addr_ts;
2760                 goto fail;
2761             }
2762         }
2763         break;
2764     case NAL_EOS_NUT:
2765     case NAL_EOB_NUT:
2766         s->seq_decode = (s->seq_decode + 1) & 0xff;
2767         s->max_ra     = INT_MAX;
2768         break;
2769     case NAL_AUD:
2770     case NAL_FD_NUT:
2771         break;
2772     default:
2773         av_log(s->avctx, AV_LOG_INFO,
2774                "Skipping NAL unit %d\n", s->nal_unit_type);
2775     }
2776
2777     return 0;
2778 fail:
2779     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2780         return ret;
2781     return 0;
2782 }
2783
2784 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2785  * between these functions would be nice. */
2786 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2787                          HEVCNAL *nal)
2788 {
2789     int i, si, di;
2790     uint8_t *dst;
2791
2792     s->skipped_bytes = 0;
2793 #define STARTCODE_TEST                                                  \
2794         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2795             if (src[i + 2] != 3) {                                      \
2796                 /* startcode, so we must be past the end */             \
2797                 length = i;                                             \
2798             }                                                           \
2799             break;                                                      \
2800         }
2801 #if HAVE_FAST_UNALIGNED
2802 #define FIND_FIRST_ZERO                                                 \
2803         if (i > 0 && !src[i])                                           \
2804             i--;                                                        \
2805         while (src[i])                                                  \
2806             i++
2807 #if HAVE_FAST_64BIT
2808     for (i = 0; i + 1 < length; i += 9) {
2809         if (!((~AV_RN64A(src + i) &
2810                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2811               0x8000800080008080ULL))
2812             continue;
2813         FIND_FIRST_ZERO;
2814         STARTCODE_TEST;
2815         i -= 7;
2816     }
2817 #else
2818     for (i = 0; i + 1 < length; i += 5) {
2819         if (!((~AV_RN32A(src + i) &
2820                (AV_RN32A(src + i) - 0x01000101U)) &
2821               0x80008080U))
2822             continue;
2823         FIND_FIRST_ZERO;
2824         STARTCODE_TEST;
2825         i -= 3;
2826     }
2827 #endif /* HAVE_FAST_64BIT */
2828 #else
2829     for (i = 0; i + 1 < length; i += 2) {
2830         if (src[i])
2831             continue;
2832         if (i > 0 && src[i - 1] == 0)
2833             i--;
2834         STARTCODE_TEST;
2835     }
2836 #endif /* HAVE_FAST_UNALIGNED */
2837
2838     if (i >= length - 1) { // no escaped 0
2839         nal->data     =
2840         nal->raw_data = src;
2841         nal->size     =
2842         nal->raw_size = length;
2843         return length;
2844     }
2845
2846     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2847                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2848     if (!nal->rbsp_buffer)
2849         return AVERROR(ENOMEM);
2850
2851     dst = nal->rbsp_buffer;
2852
2853     memcpy(dst, src, i);
2854     si = di = i;
2855     while (si + 2 < length) {
2856         // remove escapes (very rare 1:2^22)
2857         if (src[si + 2] > 3) {
2858             dst[di++] = src[si++];
2859             dst[di++] = src[si++];
2860         } else if (src[si] == 0 && src[si + 1] == 0) {
2861             if (src[si + 2] == 3) { // escape
2862                 dst[di++] = 0;
2863                 dst[di++] = 0;
2864                 si       += 3;
2865
2866                 s->skipped_bytes++;
2867                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2868                     s->skipped_bytes_pos_size *= 2;
2869                     av_reallocp_array(&s->skipped_bytes_pos,
2870                             s->skipped_bytes_pos_size,
2871                             sizeof(*s->skipped_bytes_pos));
2872                     if (!s->skipped_bytes_pos)
2873                         return AVERROR(ENOMEM);
2874                 }
2875                 if (s->skipped_bytes_pos)
2876                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2877                 continue;
2878             } else // next start code
2879                 goto nsc;
2880         }
2881
2882         dst[di++] = src[si++];
2883     }
2884     while (si < length)
2885         dst[di++] = src[si++];
2886
2887 nsc:
2888     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2889
2890     nal->data = dst;
2891     nal->size = di;
2892     nal->raw_data = src;
2893     nal->raw_size = si;
2894     return si;
2895 }
2896
2897 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2898 {
2899     int i, consumed, ret = 0;
2900
2901     s->ref = NULL;
2902     s->last_eos = s->eos;
2903     s->eos = 0;
2904
2905     /* split the input packet into NAL units, so we know the upper bound on the
2906      * number of slices in the frame */
2907     s->nb_nals = 0;
2908     while (length >= 4) {
2909         HEVCNAL *nal;
2910         int extract_length = 0;
2911
2912         if (s->is_nalff) {
2913             int i;
2914             for (i = 0; i < s->nal_length_size; i++)
2915                 extract_length = (extract_length << 8) | buf[i];
2916             buf    += s->nal_length_size;
2917             length -= s->nal_length_size;
2918
2919             if (extract_length > length) {
2920                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2921                 ret = AVERROR_INVALIDDATA;
2922                 goto fail;
2923             }
2924         } else {
2925             /* search start code */
2926             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2927                 ++buf;
2928                 --length;
2929                 if (length < 4) {
2930                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2931                     ret = AVERROR_INVALIDDATA;
2932                     goto fail;
2933                 }
2934             }
2935
2936             buf           += 3;
2937             length        -= 3;
2938         }
2939
2940         if (!s->is_nalff)
2941             extract_length = length;
2942
2943         if (s->nals_allocated < s->nb_nals + 1) {
2944             int new_size = s->nals_allocated + 1;
2945             void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2946             ret = AVERROR(ENOMEM);
2947             if (!tmp) {
2948                 goto fail;
2949             }
2950             s->nals = tmp;
2951             memset(s->nals + s->nals_allocated, 0,
2952                    (new_size - s->nals_allocated) * sizeof(*s->nals));
2953
2954             tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2955             if (!tmp)
2956                 goto fail;
2957             s->skipped_bytes_nal = tmp;
2958
2959             tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2960             if (!tmp)
2961                 goto fail;
2962             s->skipped_bytes_pos_size_nal = tmp;
2963
2964             tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2965             if (!tmp)
2966                 goto fail;
2967             s->skipped_bytes_pos_nal = tmp;
2968
2969             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2970             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2971             if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2972                 goto fail;
2973             s->nals_allocated = new_size;
2974         }
2975         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2976         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2977         nal = &s->nals[s->nb_nals];
2978
2979         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2980
2981         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2982         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2983         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2984
2985
2986         if (consumed < 0) {
2987             ret = consumed;
2988             goto fail;
2989         }
2990
2991         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2992         if (ret < 0)
2993             goto fail;
2994         hls_nal_unit(s);
2995
2996         if (s->nal_unit_type == NAL_EOB_NUT ||
2997             s->nal_unit_type == NAL_EOS_NUT)
2998             s->eos = 1;
2999
3000         buf    += consumed;
3001         length -= consumed;
3002     }
3003
3004     /* parse the NAL units */
3005     for (i = 0; i < s->nb_nals; i++) {
3006         s->skipped_bytes = s->skipped_bytes_nal[i];
3007         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
3008
3009         ret = decode_nal_unit(s, &s->nals[i]);
3010         if (ret < 0) {
3011             av_log(s->avctx, AV_LOG_WARNING,
3012                    "Error parsing NAL unit #%d.\n", i);
3013             goto fail;
3014         }
3015     }
3016
3017 fail:
3018     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3019         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3020
3021     return ret;
3022 }
3023
3024 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3025 {
3026     int i;
3027     for (i = 0; i < 16; i++)
3028         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3029 }
3030
3031 static int verify_md5(HEVCContext *s, AVFrame *frame)
3032 {
3033     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3034     int pixel_shift;
3035     int i, j;
3036
3037     if (!desc)
3038         return AVERROR(EINVAL);
3039
3040     pixel_shift = desc->comp[0].depth_minus1 > 7;
3041
3042     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3043            s->poc);
3044
3045     /* the checksums are LE, so we have to byteswap for >8bpp formats
3046      * on BE arches */
3047 #if HAVE_BIGENDIAN
3048     if (pixel_shift && !s->checksum_buf) {
3049         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3050                        FFMAX3(frame->linesize[0], frame->linesize[1],
3051                               frame->linesize[2]));
3052         if (!s->checksum_buf)
3053             return AVERROR(ENOMEM);
3054     }
3055 #endif
3056
3057     for (i = 0; frame->data[i]; i++) {
3058         int width  = s->avctx->coded_width;
3059         int height = s->avctx->coded_height;
3060         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3061         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3062         uint8_t md5[16];
3063
3064         av_md5_init(s->md5_ctx);
3065         for (j = 0; j < h; j++) {
3066             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3067 #if HAVE_BIGENDIAN
3068             if (pixel_shift) {
3069                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3070                                     (const uint16_t *) src, w);
3071                 src = s->checksum_buf;
3072             }
3073 #endif
3074             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3075         }
3076         av_md5_final(s->md5_ctx, md5);
3077
3078         if (!memcmp(md5, s->md5[i], 16)) {
3079             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3080             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3081             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3082         } else {
3083             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3084             print_md5(s->avctx, AV_LOG_ERROR, md5);
3085             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3086             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3087             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3088             return AVERROR_INVALIDDATA;
3089         }
3090     }
3091
3092     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3093
3094     return 0;
3095 }
3096
3097 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3098                              AVPacket *avpkt)
3099 {
3100     int ret;
3101     HEVCContext *s = avctx->priv_data;
3102
3103     if (!avpkt->size) {
3104         ret = ff_hevc_output_frame(s, data, 1);
3105         if (ret < 0)
3106             return ret;
3107
3108         *got_output = ret;
3109         return 0;
3110     }
3111
3112     s->ref = NULL;
3113     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3114     if (ret < 0)
3115         return ret;
3116
3117     if (avctx->hwaccel) {
3118         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3119             av_log(avctx, AV_LOG_ERROR,
3120                    "hardware accelerator failed to decode picture\n");
3121     } else {
3122         /* verify the SEI checksum */
3123         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3124             s->is_md5) {
3125             ret = verify_md5(s, s->ref->frame);
3126             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3127                 ff_hevc_unref_frame(s, s->ref, ~0);
3128                 return ret;
3129             }
3130         }
3131     }
3132     s->is_md5 = 0;
3133
3134     if (s->is_decoded) {
3135         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3136         s->is_decoded = 0;
3137     }
3138
3139     if (s->output_frame->buf[0]) {
3140         av_frame_move_ref(data, s->output_frame);
3141         *got_output = 1;
3142     }
3143
3144     return avpkt->size;
3145 }
3146
3147 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3148 {
3149     int ret;
3150
3151     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3152     if (ret < 0)
3153         return ret;
3154
3155     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3156     if (!dst->tab_mvf_buf)
3157         goto fail;
3158     dst->tab_mvf = src->tab_mvf;
3159
3160     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3161     if (!dst->rpl_tab_buf)
3162         goto fail;
3163     dst->rpl_tab = src->rpl_tab;
3164
3165     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3166     if (!dst->rpl_buf)
3167         goto fail;
3168
3169     dst->poc        = src->poc;
3170     dst->ctb_count  = src->ctb_count;
3171     dst->window     = src->window;
3172     dst->flags      = src->flags;
3173     dst->sequence   = src->sequence;
3174
3175     if (src->hwaccel_picture_private) {
3176         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3177         if (!dst->hwaccel_priv_buf)
3178             goto fail;
3179         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3180     }
3181
3182     return 0;
3183 fail:
3184     ff_hevc_unref_frame(s, dst, ~0);
3185     return AVERROR(ENOMEM);
3186 }
3187
3188 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3189 {
3190     HEVCContext       *s = avctx->priv_data;
3191     int i;
3192
3193     pic_arrays_free(s);
3194
3195     av_freep(&s->md5_ctx);
3196
3197     for(i=0; i < s->nals_allocated; i++) {
3198         av_freep(&s->skipped_bytes_pos_nal[i]);
3199     }
3200     av_freep(&s->skipped_bytes_pos_size_nal);
3201     av_freep(&s->skipped_bytes_nal);
3202     av_freep(&s->skipped_bytes_pos_nal);
3203
3204     av_freep(&s->cabac_state);
3205
3206     for (i = 0; i < 3; i++) {
3207         av_freep(&s->sao_pixel_buffer_h[i]);
3208         av_freep(&s->sao_pixel_buffer_v[i]);
3209     }
3210     av_frame_free(&s->output_frame);
3211
3212     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3213         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3214         av_frame_free(&s->DPB[i].frame);
3215     }
3216
3217     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3218         av_buffer_unref(&s->vps_list[i]);
3219     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3220         av_buffer_unref(&s->sps_list[i]);
3221     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3222         av_buffer_unref(&s->pps_list[i]);
3223     s->sps = NULL;
3224     s->pps = NULL;
3225     s->vps = NULL;
3226
3227     av_freep(&s->sh.entry_point_offset);
3228     av_freep(&s->sh.offset);
3229     av_freep(&s->sh.size);
3230
3231     for (i = 1; i < s->threads_number; i++) {
3232         HEVCLocalContext *lc = s->HEVClcList[i];
3233         if (lc) {
3234             av_freep(&s->HEVClcList[i]);
3235             av_freep(&s->sList[i]);
3236         }
3237     }
3238     if (s->HEVClc == s->HEVClcList[0])
3239         s->HEVClc = NULL;
3240     av_freep(&s->HEVClcList[0]);
3241
3242     for (i = 0; i < s->nals_allocated; i++)
3243         av_freep(&s->nals[i].rbsp_buffer);
3244     av_freep(&s->nals);
3245     s->nals_allocated = 0;
3246
3247     return 0;
3248 }
3249
3250 static av_cold int hevc_init_context(AVCodecContext *avctx)
3251 {
3252     HEVCContext *s = avctx->priv_data;
3253     int i;
3254
3255     s->avctx = avctx;
3256
3257     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3258     if (!s->HEVClc)
3259         goto fail;
3260     s->HEVClcList[0] = s->HEVClc;
3261     s->sList[0] = s;
3262
3263     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3264     if (!s->cabac_state)
3265         goto fail;
3266
3267     s->output_frame = av_frame_alloc();
3268     if (!s->output_frame)
3269         goto fail;
3270
3271     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3272         s->DPB[i].frame = av_frame_alloc();
3273         if (!s->DPB[i].frame)
3274             goto fail;
3275         s->DPB[i].tf.f = s->DPB[i].frame;
3276     }
3277
3278     s->max_ra = INT_MAX;
3279
3280     s->md5_ctx = av_md5_alloc();
3281     if (!s->md5_ctx)
3282         goto fail;
3283
3284     ff_bswapdsp_init(&s->bdsp);
3285
3286     s->context_initialized = 1;
3287     s->eos = 0;
3288
3289     return 0;
3290
3291 fail:
3292     hevc_decode_free(avctx);
3293     return AVERROR(ENOMEM);
3294 }
3295
3296 static int hevc_update_thread_context(AVCodecContext *dst,
3297                                       const AVCodecContext *src)
3298 {
3299     HEVCContext *s  = dst->priv_data;
3300     HEVCContext *s0 = src->priv_data;
3301     int i, ret;
3302
3303     if (!s->context_initialized) {
3304         ret = hevc_init_context(dst);
3305         if (ret < 0)
3306             return ret;
3307     }
3308
3309     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3310         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3311         if (s0->DPB[i].frame->buf[0]) {
3312             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3313             if (ret < 0)
3314                 return ret;
3315         }
3316     }
3317
3318     if (s->sps != s0->sps)
3319         s->sps = NULL;
3320     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3321         av_buffer_unref(&s->vps_list[i]);
3322         if (s0->vps_list[i]) {
3323             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3324             if (!s->vps_list[i])
3325                 return AVERROR(ENOMEM);
3326         }
3327     }
3328
3329     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3330         av_buffer_unref(&s->sps_list[i]);
3331         if (s0->sps_list[i]) {
3332             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3333             if (!s->sps_list[i])
3334                 return AVERROR(ENOMEM);
3335         }
3336     }
3337
3338     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3339         av_buffer_unref(&s->pps_list[i]);
3340         if (s0->pps_list[i]) {
3341             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3342             if (!s->pps_list[i])
3343                 return AVERROR(ENOMEM);
3344         }
3345     }
3346
3347     if (s->sps != s0->sps)
3348         if ((ret = set_sps(s, s0->sps, src->pix_fmt)) < 0)
3349             return ret;
3350
3351     s->seq_decode = s0->seq_decode;
3352     s->seq_output = s0->seq_output;
3353     s->pocTid0    = s0->pocTid0;
3354     s->max_ra     = s0->max_ra;
3355     s->eos        = s0->eos;
3356
3357     s->is_nalff        = s0->is_nalff;
3358     s->nal_length_size = s0->nal_length_size;
3359
3360     s->threads_number      = s0->threads_number;
3361     s->threads_type        = s0->threads_type;
3362
3363     if (s0->eos) {
3364         s->seq_decode = (s->seq_decode + 1) & 0xff;
3365         s->max_ra = INT_MAX;
3366     }
3367
3368     return 0;
3369 }
3370
3371 static int hevc_decode_extradata(HEVCContext *s)
3372 {
3373     AVCodecContext *avctx = s->avctx;
3374     GetByteContext gb;
3375     int ret, i;
3376
3377     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3378
3379     if (avctx->extradata_size > 3 &&
3380         (avctx->extradata[0] || avctx->extradata[1] ||
3381          avctx->extradata[2] > 1)) {
3382         /* It seems the extradata is encoded as hvcC format.
3383          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3384          * is finalized. When finalized, configurationVersion will be 1 and we
3385          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3386         int i, j, num_arrays, nal_len_size;
3387
3388         s->is_nalff = 1;
3389
3390         bytestream2_skip(&gb, 21);
3391         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3392         num_arrays   = bytestream2_get_byte(&gb);
3393
3394         /* nal units in the hvcC always have length coded with 2 bytes,
3395          * so put a fake nal_length_size = 2 while parsing them */
3396         s->nal_length_size = 2;
3397
3398         /* Decode nal units from hvcC. */
3399         for (i = 0; i < num_arrays; i++) {
3400             int type = bytestream2_get_byte(&gb) & 0x3f;
3401             int cnt  = bytestream2_get_be16(&gb);
3402
3403             for (j = 0; j < cnt; j++) {
3404                 // +2 for the nal size field
3405                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3406                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3407                     av_log(s->avctx, AV_LOG_ERROR,
3408                            "Invalid NAL unit size in extradata.\n");
3409                     return AVERROR_INVALIDDATA;
3410                 }
3411
3412                 ret = decode_nal_units(s, gb.buffer, nalsize);
3413                 if (ret < 0) {
3414                     av_log(avctx, AV_LOG_ERROR,
3415                            "Decoding nal unit %d %d from hvcC failed\n",
3416                            type, i);
3417                     return ret;
3418                 }
3419                 bytestream2_skip(&gb, nalsize);
3420             }
3421         }
3422
3423         /* Now store right nal length size, that will be used to parse
3424          * all other nals */
3425         s->nal_length_size = nal_len_size;
3426     } else {
3427         s->is_nalff = 0;
3428         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3429         if (ret < 0)
3430             return ret;
3431     }
3432
3433     /* export stream parameters from the first SPS */
3434     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3435         if (s->sps_list[i]) {
3436             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3437             export_stream_params(s->avctx, s, sps);
3438             break;
3439         }
3440     }
3441
3442     return 0;
3443 }
3444
3445 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3446 {
3447     HEVCContext *s = avctx->priv_data;
3448     int ret;
3449
3450     ff_init_cabac_states();
3451
3452     avctx->internal->allocate_progress = 1;
3453
3454     ret = hevc_init_context(avctx);
3455     if (ret < 0)
3456         return ret;
3457
3458     s->enable_parallel_tiles = 0;
3459     s->picture_struct = 0;
3460
3461     if(avctx->active_thread_type & FF_THREAD_SLICE)
3462         s->threads_number = avctx->thread_count;
3463     else
3464         s->threads_number = 1;
3465
3466     if (avctx->extradata_size > 0 && avctx->extradata) {
3467         ret = hevc_decode_extradata(s);
3468         if (ret < 0) {
3469             hevc_decode_free(avctx);
3470             return ret;
3471         }
3472     }
3473
3474     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3475             s->threads_type = FF_THREAD_FRAME;
3476         else
3477             s->threads_type = FF_THREAD_SLICE;
3478
3479     return 0;
3480 }
3481
3482 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3483 {
3484     HEVCContext *s = avctx->priv_data;
3485     int ret;
3486
3487     memset(s, 0, sizeof(*s));
3488
3489     ret = hevc_init_context(avctx);
3490     if (ret < 0)
3491         return ret;
3492
3493     return 0;
3494 }
3495
3496 static void hevc_decode_flush(AVCodecContext *avctx)
3497 {
3498     HEVCContext *s = avctx->priv_data;
3499     ff_hevc_flush_dpb(s);
3500     s->max_ra = INT_MAX;
3501 }
3502
3503 #define OFFSET(x) offsetof(HEVCContext, x)
3504 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3505
3506 static const AVProfile profiles[] = {
3507     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3508     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3509     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3510     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3511     { FF_PROFILE_UNKNOWN },
3512 };
3513
3514 static const AVOption options[] = {
3515     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3516         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3517     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3518         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3519     { NULL },
3520 };
3521
3522 static const AVClass hevc_decoder_class = {
3523     .class_name = "HEVC decoder",
3524     .item_name  = av_default_item_name,
3525     .option     = options,
3526     .version    = LIBAVUTIL_VERSION_INT,
3527 };
3528
3529 AVCodec ff_hevc_decoder = {
3530     .name                  = "hevc",
3531     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3532     .type                  = AVMEDIA_TYPE_VIDEO,
3533     .id                    = AV_CODEC_ID_HEVC,
3534     .priv_data_size        = sizeof(HEVCContext),
3535     .priv_class            = &hevc_decoder_class,
3536     .init                  = hevc_decode_init,
3537     .close                 = hevc_decode_free,
3538     .decode                = hevc_decode_frame,
3539     .flush                 = hevc_decode_flush,
3540     .update_thread_context = hevc_update_thread_context,
3541     .init_thread_copy      = hevc_init_thread_copy,
3542     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3543                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3544     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3545 };