git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx,
 284                                  const HEVCContext *s, const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     export_stream_params(s->avctx, s, sps);
 336
 337     pic_arrays_free(s);
 338     ret = pic_arrays_init(s, sps);
 339     if (ret < 0)
 340         goto fail;
 341
 342     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 343 #if CONFIG_HEVC_DXVA2_HWACCEL
 344         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 345 #endif
 346 #if CONFIG_HEVC_D3D11VA_HWACCEL
 347         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 348 #endif
 349     }
 350
 351     if (pix_fmt == AV_PIX_FMT_NONE) {
 352         *fmt++ = sps->pix_fmt;
 353         *fmt = AV_PIX_FMT_NONE;
 354
 355         ret = ff_thread_get_format(s->avctx, pix_fmts);
 356         if (ret < 0)
 357             goto fail;
 358         s->avctx->pix_fmt = ret;
 359     }
 360     else {
 361         s->avctx->pix_fmt = pix_fmt;
 362     }
 363
 364     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 365     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 366     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 367
 368     for (i = 0; i < 3; i++) {
 369         av_freep(&s->sao_pixel_buffer_h[i]);
 370         av_freep(&s->sao_pixel_buffer_v[i]);
 371     }
 372
 373     if (sps->sao_enabled && !s->avctx->hwaccel) {
 374         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 375         int c_idx;
 376
 377         for(c_idx = 0; c_idx < c_count; c_idx++) {
 378             int w = sps->width >> sps->hshift[c_idx];
 379             int h = sps->height >> sps->vshift[c_idx];
 380             s->sao_pixel_buffer_h[c_idx] =
 381                 av_malloc((w * 2 * sps->ctb_height) <<
 382                           sps->pixel_shift);
 383             s->sao_pixel_buffer_v[c_idx] =
 384                 av_malloc((h * 2 * sps->ctb_width) <<
 385                           sps->pixel_shift);
 386         }
 387     }
 388
 389     s->sps = sps;
 390     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 391
 392     return 0;
 393
 394 fail:
 395     pic_arrays_free(s);
 396     s->sps = NULL;
 397     return ret;
 398 }
 399
 400 static int hls_slice_header(HEVCContext *s)
 401 {
 402     GetBitContext *gb = &s->HEVClc->gb;
 403     SliceHeader *sh   = &s->sh;
 404     int i, ret;
 405
 406     // Coded parameters
 407     sh->first_slice_in_pic_flag = get_bits1(gb);
 408     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 409         s->seq_decode = (s->seq_decode + 1) & 0xff;
 410         s->max_ra     = INT_MAX;
 411         if (IS_IDR(s))
 412             ff_hevc_clear_refs(s);
 413     }
 414     sh->no_output_of_prior_pics_flag = 0;
 415     if (IS_IRAP(s))
 416         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 417
 418     sh->pps_id = get_ue_golomb_long(gb);
 419     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 420         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 421         return AVERROR_INVALIDDATA;
 422     }
 423     if (!sh->first_slice_in_pic_flag &&
 424         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 425         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 426         return AVERROR_INVALIDDATA;
 427     }
 428     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 429     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 430         sh->no_output_of_prior_pics_flag = 1;
 431
 432     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 433         const HEVCSPS* last_sps = s->sps;
 434         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 435         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 436             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 437                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 438                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 439                 sh->no_output_of_prior_pics_flag = 0;
 440         }
 441         ff_hevc_clear_refs(s);
 442         ret = set_sps(s, s->sps, AV_PIX_FMT_NONE);
 443         if (ret < 0)
 444             return ret;
 445
 446         s->seq_decode = (s->seq_decode + 1) & 0xff;
 447         s->max_ra     = INT_MAX;
 448     }
 449
 450     sh->dependent_slice_segment_flag = 0;
 451     if (!sh->first_slice_in_pic_flag) {
 452         int slice_address_length;
 453
 454         if (s->pps->dependent_slice_segments_enabled_flag)
 455             sh->dependent_slice_segment_flag = get_bits1(gb);
 456
 457         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 458                                             s->sps->ctb_height);
 459         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 460         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 461             av_log(s->avctx, AV_LOG_ERROR,
 462                    "Invalid slice segment address: %u.\n",
 463                    sh->slice_segment_addr);
 464             return AVERROR_INVALIDDATA;
 465         }
 466
 467         if (!sh->dependent_slice_segment_flag) {
 468             sh->slice_addr = sh->slice_segment_addr;
 469             s->slice_idx++;
 470         }
 471     } else {
 472         sh->slice_segment_addr = sh->slice_addr = 0;
 473         s->slice_idx           = 0;
 474         s->slice_initialized   = 0;
 475     }
 476
 477     if (!sh->dependent_slice_segment_flag) {
 478         s->slice_initialized = 0;
 479
 480         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 481             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 482
 483         sh->slice_type = get_ue_golomb_long(gb);
 484         if (!(sh->slice_type == I_SLICE ||
 485               sh->slice_type == P_SLICE ||
 486               sh->slice_type == B_SLICE)) {
 487             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 488                    sh->slice_type);
 489             return AVERROR_INVALIDDATA;
 490         }
 491         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 492             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 493             return AVERROR_INVALIDDATA;
 494         }
 495
 496         // when flag is not present, picture is inferred to be output
 497         sh->pic_output_flag = 1;
 498         if (s->pps->output_flag_present_flag)
 499             sh->pic_output_flag = get_bits1(gb);
 500
 501         if (s->sps->separate_colour_plane_flag)
 502             sh->colour_plane_id = get_bits(gb, 2);
 503
 504         if (!IS_IDR(s)) {
 505             int poc, pos;
 506
 507             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 508             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 509             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 510                 av_log(s->avctx, AV_LOG_WARNING,
 511                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 512                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 513                     return AVERROR_INVALIDDATA;
 514                 poc = s->poc;
 515             }
 516             s->poc = poc;
 517
 518             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 519             pos = get_bits_left(gb);
 520             if (!sh->short_term_ref_pic_set_sps_flag) {
 521                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 522                 if (ret < 0)
 523                     return ret;
 524
 525                 sh->short_term_rps = &sh->slice_rps;
 526             } else {
 527                 int numbits, rps_idx;
 528
 529                 if (!s->sps->nb_st_rps) {
 530                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 531                     return AVERROR_INVALIDDATA;
 532                 }
 533
 534                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 535                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 536                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 537             }
 538             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 539
 540             pos = get_bits_left(gb);
 541             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 542             if (ret < 0) {
 543                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 544                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 545                     return AVERROR_INVALIDDATA;
 546             }
 547             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 548
 549             if (s->sps->sps_temporal_mvp_enabled_flag)
 550                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 551             else
 552                 sh->slice_temporal_mvp_enabled_flag = 0;
 553         } else {
 554             s->sh.short_term_rps = NULL;
 555             s->poc               = 0;
 556         }
 557
 558         /* 8.3.1 */
 559         if (s->temporal_id == 0 &&
 560             s->nal_unit_type != NAL_TRAIL_N &&
 561             s->nal_unit_type != NAL_TSA_N   &&
 562             s->nal_unit_type != NAL_STSA_N  &&
 563             s->nal_unit_type != NAL_RADL_N  &&
 564             s->nal_unit_type != NAL_RADL_R  &&
 565             s->nal_unit_type != NAL_RASL_N  &&
 566             s->nal_unit_type != NAL_RASL_R)
 567             s->pocTid0 = s->poc;
 568
 569         if (s->sps->sao_enabled) {
 570             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 571             if (s->sps->chroma_format_idc) {
 572                 sh->slice_sample_adaptive_offset_flag[1] =
 573                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 574             }
 575         } else {
 576             sh->slice_sample_adaptive_offset_flag[0] = 0;
 577             sh->slice_sample_adaptive_offset_flag[1] = 0;
 578             sh->slice_sample_adaptive_offset_flag[2] = 0;
 579         }
 580
 581         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 582         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 583             int nb_refs;
 584
 585             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 586             if (sh->slice_type == B_SLICE)
 587                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 588
 589             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 590                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 591                 if (sh->slice_type == B_SLICE)
 592                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 593             }
 594             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 595                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 596                        sh->nb_refs[L0], sh->nb_refs[L1]);
 597                 return AVERROR_INVALIDDATA;
 598             }
 599
 600             sh->rpl_modification_flag[0] = 0;
 601             sh->rpl_modification_flag[1] = 0;
 602             nb_refs = ff_hevc_frame_nb_refs(s);
 603             if (!nb_refs) {
 604                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 605                 return AVERROR_INVALIDDATA;
 606             }
 607
 608             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 609                 sh->rpl_modification_flag[0] = get_bits1(gb);
 610                 if (sh->rpl_modification_flag[0]) {
 611                     for (i = 0; i < sh->nb_refs[L0]; i++)
 612                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 613                 }
 614
 615                 if (sh->slice_type == B_SLICE) {
 616                     sh->rpl_modification_flag[1] = get_bits1(gb);
 617                     if (sh->rpl_modification_flag[1] == 1)
 618                         for (i = 0; i < sh->nb_refs[L1]; i++)
 619                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 620                 }
 621             }
 622
 623             if (sh->slice_type == B_SLICE)
 624                 sh->mvd_l1_zero_flag = get_bits1(gb);
 625
 626             if (s->pps->cabac_init_present_flag)
 627                 sh->cabac_init_flag = get_bits1(gb);
 628             else
 629                 sh->cabac_init_flag = 0;
 630
 631             sh->collocated_ref_idx = 0;
 632             if (sh->slice_temporal_mvp_enabled_flag) {
 633                 sh->collocated_list = L0;
 634                 if (sh->slice_type == B_SLICE)
 635                     sh->collocated_list = !get_bits1(gb);
 636
 637                 if (sh->nb_refs[sh->collocated_list] > 1) {
 638                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 639                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 640                         av_log(s->avctx, AV_LOG_ERROR,
 641                                "Invalid collocated_ref_idx: %d.\n",
 642                                sh->collocated_ref_idx);
 643                         return AVERROR_INVALIDDATA;
 644                     }
 645                 }
 646             }
 647
 648             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 649                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 650                 pred_weight_table(s, gb);
 651             }
 652
 653             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 654             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 655                 av_log(s->avctx, AV_LOG_ERROR,
 656                        "Invalid number of merging MVP candidates: %d.\n",
 657                        sh->max_num_merge_cand);
 658                 return AVERROR_INVALIDDATA;
 659             }
 660         }
 661
 662         sh->slice_qp_delta = get_se_golomb(gb);
 663
 664         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 665             sh->slice_cb_qp_offset = get_se_golomb(gb);
 666             sh->slice_cr_qp_offset = get_se_golomb(gb);
 667         } else {
 668             sh->slice_cb_qp_offset = 0;
 669             sh->slice_cr_qp_offset = 0;
 670         }
 671
 672         if (s->pps->chroma_qp_offset_list_enabled_flag)
 673             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 674         else
 675             sh->cu_chroma_qp_offset_enabled_flag = 0;
 676
 677         if (s->pps->deblocking_filter_control_present_flag) {
 678             int deblocking_filter_override_flag = 0;
 679
 680             if (s->pps->deblocking_filter_override_enabled_flag)
 681                 deblocking_filter_override_flag = get_bits1(gb);
 682
 683             if (deblocking_filter_override_flag) {
 684                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 685                 if (!sh->disable_deblocking_filter_flag) {
 686                     sh->beta_offset = get_se_golomb(gb) * 2;
 687                     sh->tc_offset   = get_se_golomb(gb) * 2;
 688                 }
 689             } else {
 690                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 691                 sh->beta_offset                    = s->pps->beta_offset;
 692                 sh->tc_offset                      = s->pps->tc_offset;
 693             }
 694         } else {
 695             sh->disable_deblocking_filter_flag = 0;
 696             sh->beta_offset                    = 0;
 697             sh->tc_offset                      = 0;
 698         }
 699
 700         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 701             (sh->slice_sample_adaptive_offset_flag[0] ||
 702              sh->slice_sample_adaptive_offset_flag[1] ||
 703              !sh->disable_deblocking_filter_flag)) {
 704             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 705         } else {
 706             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 707         }
 708     } else if (!s->slice_initialized) {
 709         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 710         return AVERROR_INVALIDDATA;
 711     }
 712
 713     sh->num_entry_point_offsets = 0;
 714     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 715         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 716         // It would be possible to bound this tighter but this here is simpler
 717         if (num_entry_point_offsets > get_bits_left(gb)) {
 718             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 719             return AVERROR_INVALIDDATA;
 720         }
 721
 722         sh->num_entry_point_offsets = num_entry_point_offsets;
 723         if (sh->num_entry_point_offsets > 0) {
 724             int offset_len = get_ue_golomb_long(gb) + 1;
 725
 726             if (offset_len < 1 || offset_len > 32) {
 727                 sh->num_entry_point_offsets = 0;
 728                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 729                 return AVERROR_INVALIDDATA;
 730             }
 731
 732             av_freep(&sh->entry_point_offset);
 733             av_freep(&sh->offset);
 734             av_freep(&sh->size);
 735             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 736             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 737             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 738             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 739                 sh->num_entry_point_offsets = 0;
 740                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 741                 return AVERROR(ENOMEM);
 742             }
 743             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 744                 unsigned val = get_bits_long(gb, offset_len);
 745                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 746             }
 747             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 748                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 749                 s->threads_number = 1;
 750             } else
 751                 s->enable_parallel_tiles = 0;
 752         } else
 753             s->enable_parallel_tiles = 0;
 754     }
 755
 756     if (s->pps->slice_header_extension_present_flag) {
 757         unsigned int length = get_ue_golomb_long(gb);
 758         if (length*8LL > get_bits_left(gb)) {
 759             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 760             return AVERROR_INVALIDDATA;
 761         }
 762         for (i = 0; i < length; i++)
 763             skip_bits(gb, 8);  // slice_header_extension_data_byte
 764     }
 765
 766     // Inferred parameters
 767     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 768     if (sh->slice_qp > 51 ||
 769         sh->slice_qp < -s->sps->qp_bd_offset) {
 770         av_log(s->avctx, AV_LOG_ERROR,
 771                "The slice_qp %d is outside the valid range "
 772                "[%d, 51].\n",
 773                sh->slice_qp,
 774                -s->sps->qp_bd_offset);
 775         return AVERROR_INVALIDDATA;
 776     }
 777
 778     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 779
 780     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 781         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 782         return AVERROR_INVALIDDATA;
 783     }
 784
 785     if (get_bits_left(gb) < 0) {
 786         av_log(s->avctx, AV_LOG_ERROR,
 787                "Overread slice header by %d bits\n", -get_bits_left(gb));
 788         return AVERROR_INVALIDDATA;
 789     }
 790
 791     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 792
 793     if (!s->pps->cu_qp_delta_enabled_flag)
 794         s->HEVClc->qp_y = s->sh.slice_qp;
 795
 796     s->slice_initialized = 1;
 797     s->HEVClc->tu.cu_qp_offset_cb = 0;
 798     s->HEVClc->tu.cu_qp_offset_cr = 0;
 799
 800     return 0;
 801 }
 802
 803 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 804
 805 #define SET_SAO(elem, value)                            \
 806 do {                                                    \
 807     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 808         sao->elem = value;                              \
 809     else if (sao_merge_left_flag)                       \
 810         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 811     else if (sao_merge_up_flag)                         \
 812         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 813     else                                                \
 814         sao->elem = 0;                                  \
 815 } while (0)
 816
 817 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 818 {
 819     HEVCLocalContext *lc    = s->HEVClc;
 820     int sao_merge_left_flag = 0;
 821     int sao_merge_up_flag   = 0;
 822     SAOParams *sao          = &CTB(s->sao, rx, ry);
 823     int c_idx, i;
 824
 825     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 826         s->sh.slice_sample_adaptive_offset_flag[1]) {
 827         if (rx > 0) {
 828             if (lc->ctb_left_flag)
 829                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 830         }
 831         if (ry > 0 && !sao_merge_left_flag) {
 832             if (lc->ctb_up_flag)
 833                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 834         }
 835     }
 836
 837     for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
 838         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 839                                                  s->pps->log2_sao_offset_scale_chroma;
 840
 841         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 842             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 843             continue;
 844         }
 845
 846         if (c_idx == 2) {
 847             sao->type_idx[2] = sao->type_idx[1];
 848             sao->eo_class[2] = sao->eo_class[1];
 849         } else {
 850             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 851         }
 852
 853         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 854             continue;
 855
 856         for (i = 0; i < 4; i++)
 857             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 858
 859         if (sao->type_idx[c_idx] == SAO_BAND) {
 860             for (i = 0; i < 4; i++) {
 861                 if (sao->offset_abs[c_idx][i]) {
 862                     SET_SAO(offset_sign[c_idx][i],
 863                             ff_hevc_sao_offset_sign_decode(s));
 864                 } else {
 865                     sao->offset_sign[c_idx][i] = 0;
 866                 }
 867             }
 868             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 869         } else if (c_idx != 2) {
 870             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 871         }
 872
 873         // Inferred parameters
 874         sao->offset_val[c_idx][0] = 0;
 875         for (i = 0; i < 4; i++) {
 876             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 877             if (sao->type_idx[c_idx] == SAO_EDGE) {
 878                 if (i > 1)
 879                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 880             } else if (sao->offset_sign[c_idx][i]) {
 881                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 882             }
 883             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 884         }
 885     }
 886 }
 887
 888 #undef SET_SAO
 889 #undef CTB
 890
 891 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 892     HEVCLocalContext *lc    = s->HEVClc;
 893     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 894
 895     if (log2_res_scale_abs_plus1 !=  0) {
 896         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 897         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 898                                (1 - 2 * res_scale_sign_flag);
 899     } else {
 900         lc->tu.res_scale_val = 0;
 901     }
 902
 903
 904     return 0;
 905 }
 906
 907 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 908                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 909                               int log2_cb_size, int log2_trafo_size,
 910                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 911 {
 912     HEVCLocalContext *lc = s->HEVClc;
 913     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 914     int i;
 915
 916     if (lc->cu.pred_mode == MODE_INTRA) {
 917         int trafo_size = 1 << log2_trafo_size;
 918         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 919
 920         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 921     }
 922
 923     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 924         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 925         int scan_idx   = SCAN_DIAG;
 926         int scan_idx_c = SCAN_DIAG;
 927         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 928                          (s->sps->chroma_format_idc == 2 &&
 929                          (cbf_cb[1] || cbf_cr[1]));
 930
 931         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 932             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 933             if (lc->tu.cu_qp_delta != 0)
 934                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 935                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 936             lc->tu.is_cu_qp_delta_coded = 1;
 937
 938             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 939                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 940                 av_log(s->avctx, AV_LOG_ERROR,
 941                        "The cu_qp_delta %d is outside the valid range "
 942                        "[%d, %d].\n",
 943                        lc->tu.cu_qp_delta,
 944                        -(26 + s->sps->qp_bd_offset / 2),
 945                         (25 + s->sps->qp_bd_offset / 2));
 946                 return AVERROR_INVALIDDATA;
 947             }
 948
 949             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 950         }
 951
 952         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 953             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 954             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 955             if (cu_chroma_qp_offset_flag) {
 956                 int cu_chroma_qp_offset_idx  = 0;
 957                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 958                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 959                     av_log(s->avctx, AV_LOG_ERROR,
 960                         "cu_chroma_qp_offset_idx not yet tested.\n");
 961                 }
 962                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 963                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 964             } else {
 965                 lc->tu.cu_qp_offset_cb = 0;
 966                 lc->tu.cu_qp_offset_cr = 0;
 967             }
 968             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 969         }
 970
 971         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 972             if (lc->tu.intra_pred_mode >= 6 &&
 973                 lc->tu.intra_pred_mode <= 14) {
 974                 scan_idx = SCAN_VERT;
 975             } else if (lc->tu.intra_pred_mode >= 22 &&
 976                        lc->tu.intra_pred_mode <= 30) {
 977                 scan_idx = SCAN_HORIZ;
 978             }
 979
 980             if (lc->tu.intra_pred_mode_c >=  6 &&
 981                 lc->tu.intra_pred_mode_c <= 14) {
 982                 scan_idx_c = SCAN_VERT;
 983             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 984                        lc->tu.intra_pred_mode_c <= 30) {
 985                 scan_idx_c = SCAN_HORIZ;
 986             }
 987         }
 988
 989         lc->tu.cross_pf = 0;
 990
 991         if (cbf_luma)
 992             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 993         if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
 994             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 995             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 996             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
 997                                 (lc->cu.pred_mode == MODE_INTER ||
 998                                  (lc->tu.chroma_mode_c ==  4)));
 999
1000             if (lc->tu.cross_pf) {
1001                 hls_cross_component_pred(s, 0);
1002             }
1003             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1004                 if (lc->cu.pred_mode == MODE_INTRA) {
1005                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1006                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1007                 }
1008                 if (cbf_cb[i])
1009                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1010                                                 log2_trafo_size_c, scan_idx_c, 1);
1011                 else
1012                     if (lc->tu.cross_pf) {
1013                         ptrdiff_t stride = s->frame->linesize[1];
1014                         int hshift = s->sps->hshift[1];
1015                         int vshift = s->sps->vshift[1];
1016                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1017                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1018                         int size = 1 << log2_trafo_size_c;
1019
1020                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1021                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
1022                         for (i = 0; i < (size * size); i++) {
1023                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1024                         }
1025                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1026                     }
1027             }
1028
1029             if (lc->tu.cross_pf) {
1030                 hls_cross_component_pred(s, 1);
1031             }
1032             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1033                 if (lc->cu.pred_mode == MODE_INTRA) {
1034                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1035                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1036                 }
1037                 if (cbf_cr[i])
1038                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1039                                                 log2_trafo_size_c, scan_idx_c, 2);
1040                 else
1041                     if (lc->tu.cross_pf) {
1042                         ptrdiff_t stride = s->frame->linesize[2];
1043                         int hshift = s->sps->hshift[2];
1044                         int vshift = s->sps->vshift[2];
1045                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1046                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1047                         int size = 1 << log2_trafo_size_c;
1048
1049                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1050                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1051                         for (i = 0; i < (size * size); i++) {
1052                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1053                         }
1054                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1055                     }
1056             }
1057         } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1058             int trafo_size_h = 1 << (log2_trafo_size + 1);
1059             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1060             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1061                 if (lc->cu.pred_mode == MODE_INTRA) {
1062                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1063                                                     trafo_size_h, trafo_size_v);
1064                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1065                 }
1066                 if (cbf_cb[i])
1067                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1068                                                 log2_trafo_size, scan_idx_c, 1);
1069             }
1070             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1071                 if (lc->cu.pred_mode == MODE_INTRA) {
1072                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1073                                                 trafo_size_h, trafo_size_v);
1074                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1075                 }
1076                 if (cbf_cr[i])
1077                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1078                                                 log2_trafo_size, scan_idx_c, 2);
1079             }
1080         }
1081     } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1082         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1083             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1084             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1085             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1086             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1087             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1088             if (s->sps->chroma_format_idc == 2) {
1089                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1090                                                 trafo_size_h, trafo_size_v);
1091                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1092                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1093             }
1094         } else if (blk_idx == 3) {
1095             int trafo_size_h = 1 << (log2_trafo_size + 1);
1096             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1097             ff_hevc_set_neighbour_available(s, xBase, yBase,
1098                                             trafo_size_h, trafo_size_v);
1099             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1100             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1101             if (s->sps->chroma_format_idc == 2) {
1102                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1103                                                 trafo_size_h, trafo_size_v);
1104                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1105                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1106             }
1107         }
1108     }
1109
1110     return 0;
1111 }
1112
1113 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1114 {
1115     int cb_size          = 1 << log2_cb_size;
1116     int log2_min_pu_size = s->sps->log2_min_pu_size;
1117
1118     int min_pu_width     = s->sps->min_pu_width;
1119     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1120     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1121     int i, j;
1122
1123     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1124         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1125             s->is_pcm[i + j * min_pu_width] = 2;
1126 }
1127
1128 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1129                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1130                               int log2_cb_size, int log2_trafo_size,
1131                               int trafo_depth, int blk_idx,
1132                               const int *base_cbf_cb, const int *base_cbf_cr)
1133 {
1134     HEVCLocalContext *lc = s->HEVClc;
1135     uint8_t split_transform_flag;
1136     int cbf_cb[2];
1137     int cbf_cr[2];
1138     int ret;
1139
1140     cbf_cb[0] = base_cbf_cb[0];
1141     cbf_cb[1] = base_cbf_cb[1];
1142     cbf_cr[0] = base_cbf_cr[0];
1143     cbf_cr[1] = base_cbf_cr[1];
1144
1145     if (lc->cu.intra_split_flag) {
1146         if (trafo_depth == 1) {
1147             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1148             if (s->sps->chroma_format_idc == 3) {
1149                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1150                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1151             } else {
1152                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1153                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1154             }
1155         }
1156     } else {
1157         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1158         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1159         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1160     }
1161
1162     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1163         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1164         trafo_depth     < lc->cu.max_trafo_depth       &&
1165         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1166         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1167     } else {
1168         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1169                           lc->cu.pred_mode == MODE_INTER &&
1170                           lc->cu.part_mode != PART_2Nx2N &&
1171                           trafo_depth == 0;
1172
1173         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1174                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1175                                inter_split;
1176     }
1177
1178     if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1179         if (trafo_depth == 0 || cbf_cb[0]) {
1180             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1181             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1182                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1183             }
1184         }
1185
1186         if (trafo_depth == 0 || cbf_cr[0]) {
1187             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1188             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1189                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1190             }
1191         }
1192     }
1193
1194     if (split_transform_flag) {
1195         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1196         const int x1 = x0 + trafo_size_split;
1197         const int y1 = y0 + trafo_size_split;
1198
1199 #define SUBDIVIDE(x, y, idx)                                                    \
1200 do {                                                                            \
1201     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1202                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1203                              cbf_cb, cbf_cr);                                   \
1204     if (ret < 0)                                                                \
1205         return ret;                                                             \
1206 } while (0)
1207
1208         SUBDIVIDE(x0, y0, 0);
1209         SUBDIVIDE(x1, y0, 1);
1210         SUBDIVIDE(x0, y1, 2);
1211         SUBDIVIDE(x1, y1, 3);
1212
1213 #undef SUBDIVIDE
1214     } else {
1215         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1216         int log2_min_tu_size = s->sps->log2_min_tb_size;
1217         int min_tu_width     = s->sps->min_tb_width;
1218         int cbf_luma         = 1;
1219
1220         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1221             cbf_cb[0] || cbf_cr[0] ||
1222             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1223             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1224         }
1225
1226         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1227                                  log2_cb_size, log2_trafo_size,
1228                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1229         if (ret < 0)
1230             return ret;
1231         // TODO: store cbf_luma somewhere else
1232         if (cbf_luma) {
1233             int i, j;
1234             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1235                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1236                     int x_tu = (x0 + j) >> log2_min_tu_size;
1237                     int y_tu = (y0 + i) >> log2_min_tu_size;
1238                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1239                 }
1240         }
1241         if (!s->sh.disable_deblocking_filter_flag) {
1242             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1243             if (s->pps->transquant_bypass_enable_flag &&
1244                 lc->cu.cu_transquant_bypass_flag)
1245                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1246         }
1247     }
1248     return 0;
1249 }
1250
1251 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1252 {
1253     HEVCLocalContext *lc = s->HEVClc;
1254     GetBitContext gb;
1255     int cb_size   = 1 << log2_cb_size;
1256     int stride0   = s->frame->linesize[0];
1257     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1258     int   stride1 = s->frame->linesize[1];
1259     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1260     int   stride2 = s->frame->linesize[2];
1261     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1262
1263     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1264                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1265                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1266                           s->sps->pcm.bit_depth_chroma;
1267     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1268     int ret;
1269
1270     if (!s->sh.disable_deblocking_filter_flag)
1271         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1272
1273     ret = init_get_bits(&gb, pcm, length);
1274     if (ret < 0)
1275         return ret;
1276
1277     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1278     if (s->sps->chroma_format_idc) {
1279         s->hevcdsp.put_pcm(dst1, stride1,
1280                            cb_size >> s->sps->hshift[1],
1281                            cb_size >> s->sps->vshift[1],
1282                            &gb, s->sps->pcm.bit_depth_chroma);
1283         s->hevcdsp.put_pcm(dst2, stride2,
1284                            cb_size >> s->sps->hshift[2],
1285                            cb_size >> s->sps->vshift[2],
1286                            &gb, s->sps->pcm.bit_depth_chroma);
1287     }
1288
1289     return 0;
1290 }
1291
1292 /**
1293  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1294  *
1295  * @param s HEVC decoding context
1296  * @param dst target buffer for block data at block position
1297  * @param dststride stride of the dst buffer
1298  * @param ref reference picture buffer at origin (0, 0)
1299  * @param mv motion vector (relative to block position) to get pixel data from
1300  * @param x_off horizontal position of block from origin (0, 0)
1301  * @param y_off vertical position of block from origin (0, 0)
1302  * @param block_w width of block
1303  * @param block_h height of block
1304  * @param luma_weight weighting factor applied to the luma prediction
1305  * @param luma_offset additive offset applied to the luma prediction value
1306  */
1307
1308 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1309                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1310                         int block_w, int block_h, int luma_weight, int luma_offset)
1311 {
1312     HEVCLocalContext *lc = s->HEVClc;
1313     uint8_t *src         = ref->data[0];
1314     ptrdiff_t srcstride  = ref->linesize[0];
1315     int pic_width        = s->sps->width;
1316     int pic_height       = s->sps->height;
1317     int mx               = mv->x & 3;
1318     int my               = mv->y & 3;
1319     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1320                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1321     int idx              = ff_hevc_pel_weight[block_w];
1322
1323     x_off += mv->x >> 2;
1324     y_off += mv->y >> 2;
1325     src   += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1326
1327     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1328         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1329         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1330         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1331         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1332         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1333
1334         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1335                                  edge_emu_stride, srcstride,
1336                                  block_w + QPEL_EXTRA,
1337                                  block_h + QPEL_EXTRA,
1338                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1339                                  pic_width, pic_height);
1340         src = lc->edge_emu_buffer + buf_offset;
1341         srcstride = edge_emu_stride;
1342     }
1343
1344     if (!weight_flag)
1345         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1346                                                       block_h, mx, my, block_w);
1347     else
1348         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1349                                                         block_h, s->sh.luma_log2_weight_denom,
1350                                                         luma_weight, luma_offset, mx, my, block_w);
1351 }
1352
1353 /**
1354  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1355  *
1356  * @param s HEVC decoding context
1357  * @param dst target buffer for block data at block position
1358  * @param dststride stride of the dst buffer
1359  * @param ref0 reference picture0 buffer at origin (0, 0)
1360  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1361  * @param x_off horizontal position of block from origin (0, 0)
1362  * @param y_off vertical position of block from origin (0, 0)
1363  * @param block_w width of block
1364  * @param block_h height of block
1365  * @param ref1 reference picture1 buffer at origin (0, 0)
1366  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1367  * @param current_mv current motion vector structure
1368  */
1369  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1370                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1371                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1372 {
1373     HEVCLocalContext *lc = s->HEVClc;
1374     ptrdiff_t src0stride  = ref0->linesize[0];
1375     ptrdiff_t src1stride  = ref1->linesize[0];
1376     int pic_width        = s->sps->width;
1377     int pic_height       = s->sps->height;
1378     int mx0              = mv0->x & 3;
1379     int my0              = mv0->y & 3;
1380     int mx1              = mv1->x & 3;
1381     int my1              = mv1->y & 3;
1382     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1383                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1384     int x_off0           = x_off + (mv0->x >> 2);
1385     int y_off0           = y_off + (mv0->y >> 2);
1386     int x_off1           = x_off + (mv1->x >> 2);
1387     int y_off1           = y_off + (mv1->y >> 2);
1388     int idx              = ff_hevc_pel_weight[block_w];
1389
1390     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1391     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1392
1393     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1394         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1395         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1396         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1397         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1398         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1399
1400         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1401                                  edge_emu_stride, src0stride,
1402                                  block_w + QPEL_EXTRA,
1403                                  block_h + QPEL_EXTRA,
1404                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1405                                  pic_width, pic_height);
1406         src0 = lc->edge_emu_buffer + buf_offset;
1407         src0stride = edge_emu_stride;
1408     }
1409
1410     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1411         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1412         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1413         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1414         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1415         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1416
1417         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1418                                  edge_emu_stride, src1stride,
1419                                  block_w + QPEL_EXTRA,
1420                                  block_h + QPEL_EXTRA,
1421                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1422                                  pic_width, pic_height);
1423         src1 = lc->edge_emu_buffer2 + buf_offset;
1424         src1stride = edge_emu_stride;
1425     }
1426
1427     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1428                                                 block_h, mx0, my0, block_w);
1429     if (!weight_flag)
1430         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1431                                                        block_h, mx1, my1, block_w);
1432     else
1433         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1434                                                          block_h, s->sh.luma_log2_weight_denom,
1435                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1436                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1437                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1438                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1439                                                          mx1, my1, block_w);
1440
1441 }
1442
1443 /**
1444  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1445  *
1446  * @param s HEVC decoding context
1447  * @param dst1 target buffer for block data at block position (U plane)
1448  * @param dst2 target buffer for block data at block position (V plane)
1449  * @param dststride stride of the dst1 and dst2 buffers
1450  * @param ref reference picture buffer at origin (0, 0)
1451  * @param mv motion vector (relative to block position) to get pixel data from
1452  * @param x_off horizontal position of block from origin (0, 0)
1453  * @param y_off vertical position of block from origin (0, 0)
1454  * @param block_w width of block
1455  * @param block_h height of block
1456  * @param chroma_weight weighting factor applied to the chroma prediction
1457  * @param chroma_offset additive offset applied to the chroma prediction value
1458  */
1459
1460 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1461                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1462                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1463 {
1464     HEVCLocalContext *lc = s->HEVClc;
1465     int pic_width        = s->sps->width >> s->sps->hshift[1];
1466     int pic_height       = s->sps->height >> s->sps->vshift[1];
1467     const Mv *mv         = &current_mv->mv[reflist];
1468     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1469                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1470     int idx              = ff_hevc_pel_weight[block_w];
1471     int hshift           = s->sps->hshift[1];
1472     int vshift           = s->sps->vshift[1];
1473     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1474     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1475     intptr_t _mx         = mx << (1 - hshift);
1476     intptr_t _my         = my << (1 - vshift);
1477
1478     x_off += mv->x >> (2 + hshift);
1479     y_off += mv->y >> (2 + vshift);
1480     src0  += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1481
1482     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1483         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1484         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1485         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1486         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1487         int buf_offset0 = EPEL_EXTRA_BEFORE *
1488                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1489         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1490                                  edge_emu_stride, srcstride,
1491                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1492                                  x_off - EPEL_EXTRA_BEFORE,
1493                                  y_off - EPEL_EXTRA_BEFORE,
1494                                  pic_width, pic_height);
1495
1496         src0 = lc->edge_emu_buffer + buf_offset0;
1497         srcstride = edge_emu_stride;
1498     }
1499     if (!weight_flag)
1500         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1501                                                   block_h, _mx, _my, block_w);
1502     else
1503         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1504                                                         block_h, s->sh.chroma_log2_weight_denom,
1505                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1506 }
1507
1508 /**
1509  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1510  *
1511  * @param s HEVC decoding context
1512  * @param dst target buffer for block data at block position
1513  * @param dststride stride of the dst buffer
1514  * @param ref0 reference picture0 buffer at origin (0, 0)
1515  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1516  * @param x_off horizontal position of block from origin (0, 0)
1517  * @param y_off vertical position of block from origin (0, 0)
1518  * @param block_w width of block
1519  * @param block_h height of block
1520  * @param ref1 reference picture1 buffer at origin (0, 0)
1521  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1522  * @param current_mv current motion vector structure
1523  * @param cidx chroma component(cb, cr)
1524  */
1525 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1526                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1527 {
1528     HEVCLocalContext *lc = s->HEVClc;
1529     uint8_t *src1        = ref0->data[cidx+1];
1530     uint8_t *src2        = ref1->data[cidx+1];
1531     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1532     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1533     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1534                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1535     int pic_width        = s->sps->width >> s->sps->hshift[1];
1536     int pic_height       = s->sps->height >> s->sps->vshift[1];
1537     Mv *mv0              = &current_mv->mv[0];
1538     Mv *mv1              = &current_mv->mv[1];
1539     int hshift = s->sps->hshift[1];
1540     int vshift = s->sps->vshift[1];
1541
1542     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1543     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1544     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1545     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1546     intptr_t _mx0 = mx0 << (1 - hshift);
1547     intptr_t _my0 = my0 << (1 - vshift);
1548     intptr_t _mx1 = mx1 << (1 - hshift);
1549     intptr_t _my1 = my1 << (1 - vshift);
1550
1551     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1552     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1553     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1554     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1555     int idx = ff_hevc_pel_weight[block_w];
1556     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1557     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1558
1559     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1560         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1561         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1562         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1563         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1564         int buf_offset1 = EPEL_EXTRA_BEFORE *
1565                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1566
1567         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1568                                  edge_emu_stride, src1stride,
1569                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1570                                  x_off0 - EPEL_EXTRA_BEFORE,
1571                                  y_off0 - EPEL_EXTRA_BEFORE,
1572                                  pic_width, pic_height);
1573
1574         src1 = lc->edge_emu_buffer + buf_offset1;
1575         src1stride = edge_emu_stride;
1576     }
1577
1578     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1579         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1580         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1581         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1582         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1583         int buf_offset1 = EPEL_EXTRA_BEFORE *
1584                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1585
1586         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1587                                  edge_emu_stride, src2stride,
1588                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1589                                  x_off1 - EPEL_EXTRA_BEFORE,
1590                                  y_off1 - EPEL_EXTRA_BEFORE,
1591                                  pic_width, pic_height);
1592
1593         src2 = lc->edge_emu_buffer2 + buf_offset1;
1594         src2stride = edge_emu_stride;
1595     }
1596
1597     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1598                                                 block_h, _mx0, _my0, block_w);
1599     if (!weight_flag)
1600         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1601                                                        src2, src2stride, lc->tmp,
1602                                                        block_h, _mx1, _my1, block_w);
1603     else
1604         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1605                                                          src2, src2stride, lc->tmp,
1606                                                          block_h,
1607                                                          s->sh.chroma_log2_weight_denom,
1608                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1609                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1610                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1611                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1612                                                          _mx1, _my1, block_w);
1613 }
1614
1615 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1616                                 const Mv *mv, int y0, int height)
1617 {
1618     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1619
1620     if (s->threads_type == FF_THREAD_FRAME )
1621         ff_thread_await_progress(&ref->tf, y, 0);
1622 }
1623
1624 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1625                                   int nPbH, int log2_cb_size, int part_idx,
1626                                   int merge_idx, MvField *mv)
1627 {
1628     HEVCLocalContext *lc = s->HEVClc;
1629     enum InterPredIdc inter_pred_idc = PRED_L0;
1630     int mvp_flag;
1631
1632     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1633     mv->pred_flag = 0;
1634     if (s->sh.slice_type == B_SLICE)
1635         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1636
1637     if (inter_pred_idc != PRED_L1) {
1638         if (s->sh.nb_refs[L0])
1639             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1640
1641         mv->pred_flag = PF_L0;
1642         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1643         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1644         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1645                                  part_idx, merge_idx, mv, mvp_flag, 0);
1646         mv->mv[0].x += lc->pu.mvd.x;
1647         mv->mv[0].y += lc->pu.mvd.y;
1648     }
1649
1650     if (inter_pred_idc != PRED_L0) {
1651         if (s->sh.nb_refs[L1])
1652             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1653
1654         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1655             AV_ZERO32(&lc->pu.mvd);
1656         } else {
1657             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1658         }
1659
1660         mv->pred_flag += PF_L1;
1661         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1662         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1663                                  part_idx, merge_idx, mv, mvp_flag, 1);
1664         mv->mv[1].x += lc->pu.mvd.x;
1665         mv->mv[1].y += lc->pu.mvd.y;
1666     }
1667 }
1668
1669 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1670                                 int nPbW, int nPbH,
1671                                 int log2_cb_size, int partIdx, int idx)
1672 {
1673 #define POS(c_idx, x, y)                                                              \
1674     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1675                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1676     HEVCLocalContext *lc = s->HEVClc;
1677     int merge_idx = 0;
1678     struct MvField current_mv = {{{ 0 }}};
1679
1680     int min_pu_width = s->sps->min_pu_width;
1681
1682     MvField *tab_mvf = s->ref->tab_mvf;
1683     RefPicList  *refPicList = s->ref->refPicList;
1684     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1685     uint8_t *dst0 = POS(0, x0, y0);
1686     uint8_t *dst1 = POS(1, x0, y0);
1687     uint8_t *dst2 = POS(2, x0, y0);
1688     int log2_min_cb_size = s->sps->log2_min_cb_size;
1689     int min_cb_width     = s->sps->min_cb_width;
1690     int x_cb             = x0 >> log2_min_cb_size;
1691     int y_cb             = y0 >> log2_min_cb_size;
1692     int x_pu, y_pu;
1693     int i, j;
1694
1695     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1696
1697     if (!skip_flag)
1698         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1699
1700     if (skip_flag || lc->pu.merge_flag) {
1701         if (s->sh.max_num_merge_cand > 1)
1702             merge_idx = ff_hevc_merge_idx_decode(s);
1703         else
1704             merge_idx = 0;
1705
1706         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1707                                    partIdx, merge_idx, &current_mv);
1708     } else {
1709         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710                               partIdx, merge_idx, &current_mv);
1711     }
1712
1713     x_pu = x0 >> s->sps->log2_min_pu_size;
1714     y_pu = y0 >> s->sps->log2_min_pu_size;
1715
1716     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1717         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1718             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1719
1720     if (current_mv.pred_flag & PF_L0) {
1721         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1722         if (!ref0)
1723             return;
1724         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1725     }
1726     if (current_mv.pred_flag & PF_L1) {
1727         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1728         if (!ref1)
1729             return;
1730         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1731     }
1732
1733     if (current_mv.pred_flag == PF_L0) {
1734         int x0_c = x0 >> s->sps->hshift[1];
1735         int y0_c = y0 >> s->sps->vshift[1];
1736         int nPbW_c = nPbW >> s->sps->hshift[1];
1737         int nPbH_c = nPbH >> s->sps->vshift[1];
1738
1739         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1740                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1741                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1742                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1743
1744         if (s->sps->chroma_format_idc) {
1745             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1746                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1747                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1748             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1749                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1750                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1751         }
1752     } else if (current_mv.pred_flag == PF_L1) {
1753         int x0_c = x0 >> s->sps->hshift[1];
1754         int y0_c = y0 >> s->sps->vshift[1];
1755         int nPbW_c = nPbW >> s->sps->hshift[1];
1756         int nPbH_c = nPbH >> s->sps->vshift[1];
1757
1758         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1759                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1760                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1761                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1762
1763         if (s->sps->chroma_format_idc) {
1764             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1765                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1766                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1767
1768             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1769                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1770                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1771         }
1772     } else if (current_mv.pred_flag == PF_BI) {
1773         int x0_c = x0 >> s->sps->hshift[1];
1774         int y0_c = y0 >> s->sps->vshift[1];
1775         int nPbW_c = nPbW >> s->sps->hshift[1];
1776         int nPbH_c = nPbH >> s->sps->vshift[1];
1777
1778         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1779                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1780                    ref1->frame, &current_mv.mv[1], &current_mv);
1781
1782         if (s->sps->chroma_format_idc) {
1783             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1784                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1785
1786             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1787                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1788         }
1789     }
1790 }
1791
1792 /**
1793  * 8.4.1
1794  */
1795 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1796                                 int prev_intra_luma_pred_flag)
1797 {
1798     HEVCLocalContext *lc = s->HEVClc;
1799     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1800     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1801     int min_pu_width     = s->sps->min_pu_width;
1802     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1803     int x0b              = av_mod_uintp2(x0, s->sps->log2_ctb_size);
1804     int y0b              = av_mod_uintp2(y0, s->sps->log2_ctb_size);
1805
1806     int cand_up   = (lc->ctb_up_flag || y0b) ?
1807                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1808     int cand_left = (lc->ctb_left_flag || x0b) ?
1809                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1810
1811     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1812
1813     MvField *tab_mvf = s->ref->tab_mvf;
1814     int intra_pred_mode;
1815     int candidate[3];
1816     int i, j;
1817
1818     // intra_pred_mode prediction does not cross vertical CTB boundaries
1819     if ((y0 - 1) < y_ctb)
1820         cand_up = INTRA_DC;
1821
1822     if (cand_left == cand_up) {
1823         if (cand_left < 2) {
1824             candidate[0] = INTRA_PLANAR;
1825             candidate[1] = INTRA_DC;
1826             candidate[2] = INTRA_ANGULAR_26;
1827         } else {
1828             candidate[0] = cand_left;
1829             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1830             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1831         }
1832     } else {
1833         candidate[0] = cand_left;
1834         candidate[1] = cand_up;
1835         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1836             candidate[2] = INTRA_PLANAR;
1837         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1838             candidate[2] = INTRA_DC;
1839         } else {
1840             candidate[2] = INTRA_ANGULAR_26;
1841         }
1842     }
1843
1844     if (prev_intra_luma_pred_flag) {
1845         intra_pred_mode = candidate[lc->pu.mpm_idx];
1846     } else {
1847         if (candidate[0] > candidate[1])
1848             FFSWAP(uint8_t, candidate[0], candidate[1]);
1849         if (candidate[0] > candidate[2])
1850             FFSWAP(uint8_t, candidate[0], candidate[2]);
1851         if (candidate[1] > candidate[2])
1852             FFSWAP(uint8_t, candidate[1], candidate[2]);
1853
1854         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1855         for (i = 0; i < 3; i++)
1856             if (intra_pred_mode >= candidate[i])
1857                 intra_pred_mode++;
1858     }
1859
1860     /* write the intra prediction units into the mv array */
1861     if (!size_in_pus)
1862         size_in_pus = 1;
1863     for (i = 0; i < size_in_pus; i++) {
1864         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1865                intra_pred_mode, size_in_pus);
1866
1867         for (j = 0; j < size_in_pus; j++) {
1868             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1869         }
1870     }
1871
1872     return intra_pred_mode;
1873 }
1874
1875 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1876                                           int log2_cb_size, int ct_depth)
1877 {
1878     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1879     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1880     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1881     int y;
1882
1883     for (y = 0; y < length; y++)
1884         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1885                ct_depth, length);
1886 }
1887
1888 static const uint8_t tab_mode_idx[] = {
1889      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1890     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1891
1892 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1893                                   int log2_cb_size)
1894 {
1895     HEVCLocalContext *lc = s->HEVClc;
1896     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1897     uint8_t prev_intra_luma_pred_flag[4];
1898     int split   = lc->cu.part_mode == PART_NxN;
1899     int pb_size = (1 << log2_cb_size) >> split;
1900     int side    = split + 1;
1901     int chroma_mode;
1902     int i, j;
1903
1904     for (i = 0; i < side; i++)
1905         for (j = 0; j < side; j++)
1906             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1907
1908     for (i = 0; i < side; i++) {
1909         for (j = 0; j < side; j++) {
1910             if (prev_intra_luma_pred_flag[2 * i + j])
1911                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1912             else
1913                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1914
1915             lc->pu.intra_pred_mode[2 * i + j] =
1916                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1917                                      prev_intra_luma_pred_flag[2 * i + j]);
1918         }
1919     }
1920
1921     if (s->sps->chroma_format_idc == 3) {
1922         for (i = 0; i < side; i++) {
1923             for (j = 0; j < side; j++) {
1924                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1925                 if (chroma_mode != 4) {
1926                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1927                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1928                     else
1929                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1930                 } else {
1931                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1932                 }
1933             }
1934         }
1935     } else if (s->sps->chroma_format_idc == 2) {
1936         int mode_idx;
1937         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1938         if (chroma_mode != 4) {
1939             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1940                 mode_idx = 34;
1941             else
1942                 mode_idx = intra_chroma_table[chroma_mode];
1943         } else {
1944             mode_idx = lc->pu.intra_pred_mode[0];
1945         }
1946         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1947     } else if (s->sps->chroma_format_idc != 0) {
1948         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1949         if (chroma_mode != 4) {
1950             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1951                 lc->pu.intra_pred_mode_c[0] = 34;
1952             else
1953                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1954         } else {
1955             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1956         }
1957     }
1958 }
1959
1960 static void intra_prediction_unit_default_value(HEVCContext *s,
1961                                                 int x0, int y0,
1962                                                 int log2_cb_size)
1963 {
1964     HEVCLocalContext *lc = s->HEVClc;
1965     int pb_size          = 1 << log2_cb_size;
1966     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1967     int min_pu_width     = s->sps->min_pu_width;
1968     MvField *tab_mvf     = s->ref->tab_mvf;
1969     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1970     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1971     int j, k;
1972
1973     if (size_in_pus == 0)
1974         size_in_pus = 1;
1975     for (j = 0; j < size_in_pus; j++)
1976         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1977     if (lc->cu.pred_mode == MODE_INTRA)
1978         for (j = 0; j < size_in_pus; j++)
1979             for (k = 0; k < size_in_pus; k++)
1980                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1981 }
1982
1983 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1984 {
1985     int cb_size          = 1 << log2_cb_size;
1986     HEVCLocalContext *lc = s->HEVClc;
1987     int log2_min_cb_size = s->sps->log2_min_cb_size;
1988     int length           = cb_size >> log2_min_cb_size;
1989     int min_cb_width     = s->sps->min_cb_width;
1990     int x_cb             = x0 >> log2_min_cb_size;
1991     int y_cb             = y0 >> log2_min_cb_size;
1992     int idx              = log2_cb_size - 2;
1993     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1994     int x, y, ret;
1995
1996     lc->cu.x                = x0;
1997     lc->cu.y                = y0;
1998     lc->cu.pred_mode        = MODE_INTRA;
1999     lc->cu.part_mode        = PART_2Nx2N;
2000     lc->cu.intra_split_flag = 0;
2001
2002     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2003     for (x = 0; x < 4; x++)
2004         lc->pu.intra_pred_mode[x] = 1;
2005     if (s->pps->transquant_bypass_enable_flag) {
2006         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2007         if (lc->cu.cu_transquant_bypass_flag)
2008             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2009     } else
2010         lc->cu.cu_transquant_bypass_flag = 0;
2011
2012     if (s->sh.slice_type != I_SLICE) {
2013         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2014
2015         x = y_cb * min_cb_width + x_cb;
2016         for (y = 0; y < length; y++) {
2017             memset(&s->skip_flag[x], skip_flag, length);
2018             x += min_cb_width;
2019         }
2020         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2021     } else {
2022         x = y_cb * min_cb_width + x_cb;
2023         for (y = 0; y < length; y++) {
2024             memset(&s->skip_flag[x], 0, length);
2025             x += min_cb_width;
2026         }
2027     }
2028
2029     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2030         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2031         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2032
2033         if (!s->sh.disable_deblocking_filter_flag)
2034             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2035     } else {
2036         int pcm_flag = 0;
2037
2038         if (s->sh.slice_type != I_SLICE)
2039             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2040         if (lc->cu.pred_mode != MODE_INTRA ||
2041             log2_cb_size == s->sps->log2_min_cb_size) {
2042             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2043             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2044                                       lc->cu.pred_mode == MODE_INTRA;
2045         }
2046
2047         if (lc->cu.pred_mode == MODE_INTRA) {
2048             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2049                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2050                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2051                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2052             }
2053             if (pcm_flag) {
2054                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2055                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2056                 if (s->sps->pcm.loop_filter_disable_flag)
2057                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2058
2059                 if (ret < 0)
2060                     return ret;
2061             } else {
2062                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2063             }
2064         } else {
2065             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2066             switch (lc->cu.part_mode) {
2067             case PART_2Nx2N:
2068                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2069                 break;
2070             case PART_2NxN:
2071                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2072                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2073                 break;
2074             case PART_Nx2N:
2075                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2076                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2077                 break;
2078             case PART_2NxnU:
2079                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2080                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2081                 break;
2082             case PART_2NxnD:
2083                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2084                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2085                 break;
2086             case PART_nLx2N:
2087                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2088                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2089                 break;
2090             case PART_nRx2N:
2091                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2092                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2093                 break;
2094             case PART_NxN:
2095                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2096                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2097                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2098                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2099                 break;
2100             }
2101         }
2102
2103         if (!pcm_flag) {
2104             int rqt_root_cbf = 1;
2105
2106             if (lc->cu.pred_mode != MODE_INTRA &&
2107                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2108                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2109             }
2110             if (rqt_root_cbf) {
2111                 const static int cbf[2] = { 0 };
2112                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2113                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2114                                          s->sps->max_transform_hierarchy_depth_inter;
2115                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2116                                          log2_cb_size,
2117                                          log2_cb_size, 0, 0, cbf, cbf);
2118                 if (ret < 0)
2119                     return ret;
2120             } else {
2121                 if (!s->sh.disable_deblocking_filter_flag)
2122                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2123             }
2124         }
2125     }
2126
2127     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2128         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2129
2130     x = y_cb * min_cb_width + x_cb;
2131     for (y = 0; y < length; y++) {
2132         memset(&s->qp_y_tab[x], lc->qp_y, length);
2133         x += min_cb_width;
2134     }
2135
2136     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2137        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2138         lc->qPy_pred = lc->qp_y;
2139     }
2140
2141     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2142
2143     return 0;
2144 }
2145
2146 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2147                                int log2_cb_size, int cb_depth)
2148 {
2149     HEVCLocalContext *lc = s->HEVClc;
2150     const int cb_size    = 1 << log2_cb_size;
2151     int ret;
2152     int split_cu;
2153
2154     lc->ct_depth = cb_depth;
2155     if (x0 + cb_size <= s->sps->width  &&
2156         y0 + cb_size <= s->sps->height &&
2157         log2_cb_size > s->sps->log2_min_cb_size) {
2158         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2159     } else {
2160         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2161     }
2162     if (s->pps->cu_qp_delta_enabled_flag &&
2163         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2164         lc->tu.is_cu_qp_delta_coded = 0;
2165         lc->tu.cu_qp_delta          = 0;
2166     }
2167
2168     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2169         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2170         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2171     }
2172
2173     if (split_cu) {
2174         int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2175         const int cb_size_split = cb_size >> 1;
2176         const int x1 = x0 + cb_size_split;
2177         const int y1 = y0 + cb_size_split;
2178
2179         int more_data = 0;
2180
2181         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2182         if (more_data < 0)
2183             return more_data;
2184
2185         if (more_data && x1 < s->sps->width) {
2186             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2187             if (more_data < 0)
2188                 return more_data;
2189         }
2190         if (more_data && y1 < s->sps->height) {
2191             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2192             if (more_data < 0)
2193                 return more_data;
2194         }
2195         if (more_data && x1 < s->sps->width &&
2196             y1 < s->sps->height) {
2197             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2198             if (more_data < 0)
2199                 return more_data;
2200         }
2201
2202         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2203             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2204             lc->qPy_pred = lc->qp_y;
2205
2206         if (more_data)
2207             return ((x1 + cb_size_split) < s->sps->width ||
2208                     (y1 + cb_size_split) < s->sps->height);
2209         else
2210             return 0;
2211     } else {
2212         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2213         if (ret < 0)
2214             return ret;
2215         if ((!((x0 + cb_size) %
2216                (1 << (s->sps->log2_ctb_size))) ||
2217              (x0 + cb_size >= s->sps->width)) &&
2218             (!((y0 + cb_size) %
2219                (1 << (s->sps->log2_ctb_size))) ||
2220              (y0 + cb_size >= s->sps->height))) {
2221             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2222             return !end_of_slice_flag;
2223         } else {
2224             return 1;
2225         }
2226     }
2227
2228     return 0;
2229 }
2230
2231 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2232                                  int ctb_addr_ts)
2233 {
2234     HEVCLocalContext *lc  = s->HEVClc;
2235     int ctb_size          = 1 << s->sps->log2_ctb_size;
2236     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2237     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2238
2239     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2240
2241     if (s->pps->entropy_coding_sync_enabled_flag) {
2242         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2243             lc->first_qp_group = 1;
2244         lc->end_of_tiles_x = s->sps->width;
2245     } else if (s->pps->tiles_enabled_flag) {
2246         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2247             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2248             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2249             lc->first_qp_group   = 1;
2250         }
2251     } else {
2252         lc->end_of_tiles_x = s->sps->width;
2253     }
2254
2255     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2256
2257     lc->boundary_flags = 0;
2258     if (s->pps->tiles_enabled_flag) {
2259         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2260             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2261         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2262             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2263         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2264             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2265         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2266             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2267     } else {
2268         if (ctb_addr_in_slice <= 0)
2269             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2270         if (ctb_addr_in_slice < s->sps->ctb_width)
2271             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2272     }
2273
2274     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2275     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2276     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2277     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2278 }
2279
2280 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2281 {
2282     HEVCContext *s  = avctxt->priv_data;
2283     int ctb_size    = 1 << s->sps->log2_ctb_size;
2284     int more_data   = 1;
2285     int x_ctb       = 0;
2286     int y_ctb       = 0;
2287     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2288
2289     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2290         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2291         return AVERROR_INVALIDDATA;
2292     }
2293
2294     if (s->sh.dependent_slice_segment_flag) {
2295         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2296         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2297             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2298             return AVERROR_INVALIDDATA;
2299         }
2300     }
2301
2302     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2303         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2304
2305         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2306         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2307         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2308
2309         ff_hevc_cabac_init(s, ctb_addr_ts);
2310
2311         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2312
2313         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2314         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2315         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2316
2317         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2318         if (more_data < 0) {
2319             s->tab_slice_address[ctb_addr_rs] = -1;
2320             return more_data;
2321         }
2322
2323
2324         ctb_addr_ts++;
2325         ff_hevc_save_states(s, ctb_addr_ts);
2326         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2327     }
2328
2329     if (x_ctb + ctb_size >= s->sps->width &&
2330         y_ctb + ctb_size >= s->sps->height)
2331         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2332
2333     return ctb_addr_ts;
2334 }
2335
2336 static int hls_slice_data(HEVCContext *s)
2337 {
2338     int arg[2];
2339     int ret[2];
2340
2341     arg[0] = 0;
2342     arg[1] = 1;
2343
2344     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2345     return ret[0];
2346 }
2347 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2348 {
2349     HEVCContext *s1  = avctxt->priv_data, *s;
2350     HEVCLocalContext *lc;
2351     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2352     int more_data   = 1;
2353     int *ctb_row_p    = input_ctb_row;
2354     int ctb_row = ctb_row_p[job];
2355     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2356     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2357     int thread = ctb_row % s1->threads_number;
2358     int ret;
2359
2360     s = s1->sList[self_id];
2361     lc = s->HEVClc;
2362
2363     if(ctb_row) {
2364         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2365
2366         if (ret < 0)
2367             return ret;
2368         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2369     }
2370
2371     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2372         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2373         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2374
2375         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2376
2377         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2378
2379         if (avpriv_atomic_int_get(&s1->wpp_err)){
2380             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2381             return 0;
2382         }
2383
2384         ff_hevc_cabac_init(s, ctb_addr_ts);
2385         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2386         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2387
2388         if (more_data < 0) {
2389             s->tab_slice_address[ctb_addr_rs] = -1;
2390             return more_data;
2391         }
2392
2393         ctb_addr_ts++;
2394
2395         ff_hevc_save_states(s, ctb_addr_ts);
2396         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2397         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2398
2399         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2400             avpriv_atomic_int_set(&s1->wpp_err,  1);
2401             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2402             return 0;
2403         }
2404
2405         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2406             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2407             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2408             return ctb_addr_ts;
2409         }
2410         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2411         x_ctb+=ctb_size;
2412
2413         if(x_ctb >= s->sps->width) {
2414             break;
2415         }
2416     }
2417     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2418
2419     return 0;
2420 }
2421
2422 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2423 {
2424     HEVCLocalContext *lc = s->HEVClc;
2425     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2426     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2427     int offset;
2428     int startheader, cmpt = 0;
2429     int i, j, res = 0;
2430
2431     if (!ret || !arg) {
2432         av_free(ret);
2433         av_free(arg);
2434         return AVERROR(ENOMEM);
2435     }
2436
2437
2438     if (!s->sList[1]) {
2439         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2440
2441
2442         for (i = 1; i < s->threads_number; i++) {
2443             s->sList[i] = av_malloc(sizeof(HEVCContext));
2444             memcpy(s->sList[i], s, sizeof(HEVCContext));
2445             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2446             s->sList[i]->HEVClc = s->HEVClcList[i];
2447         }
2448     }
2449
2450     offset = (lc->gb.index >> 3);
2451
2452     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2453         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2454             startheader--;
2455             cmpt++;
2456         }
2457     }
2458
2459     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2460         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2461         for (j = 0, cmpt = 0, startheader = offset
2462              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2463             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2464                 startheader--;
2465                 cmpt++;
2466             }
2467         }
2468         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2469         s->sh.offset[i - 1] = offset;
2470
2471     }
2472     if (s->sh.num_entry_point_offsets != 0) {
2473         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2474         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2475         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2476
2477     }
2478     s->data = nal;
2479
2480     for (i = 1; i < s->threads_number; i++) {
2481         s->sList[i]->HEVClc->first_qp_group = 1;
2482         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2483         memcpy(s->sList[i], s, sizeof(HEVCContext));
2484         s->sList[i]->HEVClc = s->HEVClcList[i];
2485     }
2486
2487     avpriv_atomic_int_set(&s->wpp_err, 0);
2488     ff_reset_entries(s->avctx);
2489
2490     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2491         arg[i] = i;
2492         ret[i] = 0;
2493     }
2494
2495     if (s->pps->entropy_coding_sync_enabled_flag)
2496         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2497
2498     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2499         res += ret[i];
2500     av_free(ret);
2501     av_free(arg);
2502     return res;
2503 }
2504
2505 /**
2506  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2507  * 0 if the unit should be skipped, 1 otherwise
2508  */
2509 static int hls_nal_unit(HEVCContext *s)
2510 {
2511     GetBitContext *gb = &s->HEVClc->gb;
2512     int nuh_layer_id;
2513
2514     if (get_bits1(gb) != 0)
2515         return AVERROR_INVALIDDATA;
2516
2517     s->nal_unit_type = get_bits(gb, 6);
2518
2519     nuh_layer_id   = get_bits(gb, 6);
2520     s->temporal_id = get_bits(gb, 3) - 1;
2521     if (s->temporal_id < 0)
2522         return AVERROR_INVALIDDATA;
2523
2524     av_log(s->avctx, AV_LOG_DEBUG,
2525            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2526            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2527
2528     return nuh_layer_id == 0;
2529 }
2530
2531 static int set_side_data(HEVCContext *s)
2532 {
2533     AVFrame *out = s->ref->frame;
2534
2535     if (s->sei_frame_packing_present &&
2536         s->frame_packing_arrangement_type >= 3 &&
2537         s->frame_packing_arrangement_type <= 5 &&
2538         s->content_interpretation_type > 0 &&
2539         s->content_interpretation_type < 3) {
2540         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2541         if (!stereo)
2542             return AVERROR(ENOMEM);
2543
2544         switch (s->frame_packing_arrangement_type) {
2545         case 3:
2546             if (s->quincunx_subsampling)
2547                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2548             else
2549                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2550             break;
2551         case 4:
2552             stereo->type = AV_STEREO3D_TOPBOTTOM;
2553             break;
2554         case 5:
2555             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2556             break;
2557         }
2558
2559         if (s->content_interpretation_type == 2)
2560             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2561     }
2562
2563     if (s->sei_display_orientation_present &&
2564         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2565         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2566         AVFrameSideData *rotation = av_frame_new_side_data(out,
2567                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2568                                                            sizeof(int32_t) * 9);
2569         if (!rotation)
2570             return AVERROR(ENOMEM);
2571
2572         av_display_rotation_set((int32_t *)rotation->data, angle);
2573         av_display_matrix_flip((int32_t *)rotation->data,
2574                                s->sei_hflip, s->sei_vflip);
2575     }
2576
2577     return 0;
2578 }
2579
2580 static int hevc_frame_start(HEVCContext *s)
2581 {
2582     HEVCLocalContext *lc = s->HEVClc;
2583     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2584                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2585     int ret;
2586
2587     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2588     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2589     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2590     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2591     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2592
2593     s->is_decoded        = 0;
2594     s->first_nal_type    = s->nal_unit_type;
2595
2596     if (s->pps->tiles_enabled_flag)
2597         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2598
2599     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2600     if (ret < 0)
2601         goto fail;
2602
2603     ret = ff_hevc_frame_rps(s);
2604     if (ret < 0) {
2605         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2606         goto fail;
2607     }
2608
2609     s->ref->frame->key_frame = IS_IRAP(s);
2610
2611     ret = set_side_data(s);
2612     if (ret < 0)
2613         goto fail;
2614
2615     s->frame->pict_type = 3 - s->sh.slice_type;
2616
2617     if (!IS_IRAP(s))
2618         ff_hevc_bump_frame(s);
2619
2620     av_frame_unref(s->output_frame);
2621     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2622     if (ret < 0)
2623         goto fail;
2624
2625     if (!s->avctx->hwaccel)
2626         ff_thread_finish_setup(s->avctx);
2627
2628     return 0;
2629
2630 fail:
2631     if (s->ref)
2632         ff_hevc_unref_frame(s, s->ref, ~0);
2633     s->ref = NULL;
2634     return ret;
2635 }
2636
2637 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2638 {
2639     HEVCLocalContext *lc = s->HEVClc;
2640     GetBitContext *gb    = &lc->gb;
2641     int ctb_addr_ts, ret;
2642
2643     ret = init_get_bits8(gb, nal->data, nal->size);
2644     if (ret < 0)
2645         return ret;
2646
2647     ret = hls_nal_unit(s);
2648     if (ret < 0) {
2649         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2650                s->nal_unit_type);
2651         goto fail;
2652     } else if (!ret)
2653         return 0;
2654
2655     switch (s->nal_unit_type) {
2656     case NAL_VPS:
2657         ret = ff_hevc_decode_nal_vps(s);
2658         if (ret < 0)
2659             goto fail;
2660         break;
2661     case NAL_SPS:
2662         ret = ff_hevc_decode_nal_sps(s);
2663         if (ret < 0)
2664             goto fail;
2665         break;
2666     case NAL_PPS:
2667         ret = ff_hevc_decode_nal_pps(s);
2668         if (ret < 0)
2669             goto fail;
2670         break;
2671     case NAL_SEI_PREFIX:
2672     case NAL_SEI_SUFFIX:
2673         ret = ff_hevc_decode_nal_sei(s);
2674         if (ret < 0)
2675             goto fail;
2676         break;
2677     case NAL_TRAIL_R:
2678     case NAL_TRAIL_N:
2679     case NAL_TSA_N:
2680     case NAL_TSA_R:
2681     case NAL_STSA_N:
2682     case NAL_STSA_R:
2683     case NAL_BLA_W_LP:
2684     case NAL_BLA_W_RADL:
2685     case NAL_BLA_N_LP:
2686     case NAL_IDR_W_RADL:
2687     case NAL_IDR_N_LP:
2688     case NAL_CRA_NUT:
2689     case NAL_RADL_N:
2690     case NAL_RADL_R:
2691     case NAL_RASL_N:
2692     case NAL_RASL_R:
2693         ret = hls_slice_header(s);
2694         if (ret < 0)
2695             return ret;
2696
2697         if (s->max_ra == INT_MAX) {
2698             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2699                 s->max_ra = s->poc;
2700             } else {
2701                 if (IS_IDR(s))
2702                     s->max_ra = INT_MIN;
2703             }
2704         }
2705
2706         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2707             s->poc <= s->max_ra) {
2708             s->is_decoded = 0;
2709             break;
2710         } else {
2711             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2712                 s->max_ra = INT_MIN;
2713         }
2714
2715         if (s->sh.first_slice_in_pic_flag) {
2716             ret = hevc_frame_start(s);
2717             if (ret < 0)
2718                 return ret;
2719         } else if (!s->ref) {
2720             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2721             goto fail;
2722         }
2723
2724         if (s->nal_unit_type != s->first_nal_type) {
2725             av_log(s->avctx, AV_LOG_ERROR,
2726                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2727                    s->first_nal_type, s->nal_unit_type);
2728             return AVERROR_INVALIDDATA;
2729         }
2730
2731         if (!s->sh.dependent_slice_segment_flag &&
2732             s->sh.slice_type != I_SLICE) {
2733             ret = ff_hevc_slice_rpl(s);
2734             if (ret < 0) {
2735                 av_log(s->avctx, AV_LOG_WARNING,
2736                        "Error constructing the reference lists for the current slice.\n");
2737                 goto fail;
2738             }
2739         }
2740
2741         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2742             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2743             if (ret < 0)
2744                 goto fail;
2745         }
2746
2747         if (s->avctx->hwaccel) {
2748             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2749             if (ret < 0)
2750                 goto fail;
2751         } else {
2752             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2753                 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2754             else
2755                 ctb_addr_ts = hls_slice_data(s);
2756             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2757                 s->is_decoded = 1;
2758             }
2759
2760             if (ctb_addr_ts < 0) {
2761                 ret = ctb_addr_ts;
2762                 goto fail;
2763             }
2764         }
2765         break;
2766     case NAL_EOS_NUT:
2767     case NAL_EOB_NUT:
2768         s->seq_decode = (s->seq_decode + 1) & 0xff;
2769         s->max_ra     = INT_MAX;
2770         break;
2771     case NAL_AUD:
2772     case NAL_FD_NUT:
2773         break;
2774     default:
2775         av_log(s->avctx, AV_LOG_INFO,
2776                "Skipping NAL unit %d\n", s->nal_unit_type);
2777     }
2778
2779     return 0;
2780 fail:
2781     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2782         return ret;
2783     return 0;
2784 }
2785
2786 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2787  * between these functions would be nice. */
2788 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2789                          HEVCNAL *nal)
2790 {
2791     int i, si, di;
2792     uint8_t *dst;
2793
2794     s->skipped_bytes = 0;
2795 #define STARTCODE_TEST                                                  \
2796         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2797             if (src[i + 2] != 3) {                                      \
2798                 /* startcode, so we must be past the end */             \
2799                 length = i;                                             \
2800             }                                                           \
2801             break;                                                      \
2802         }
2803 #if HAVE_FAST_UNALIGNED
2804 #define FIND_FIRST_ZERO                                                 \
2805         if (i > 0 && !src[i])                                           \
2806             i--;                                                        \
2807         while (src[i])                                                  \
2808             i++
2809 #if HAVE_FAST_64BIT
2810     for (i = 0; i + 1 < length; i += 9) {
2811         if (!((~AV_RN64A(src + i) &
2812                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2813               0x8000800080008080ULL))
2814             continue;
2815         FIND_FIRST_ZERO;
2816         STARTCODE_TEST;
2817         i -= 7;
2818     }
2819 #else
2820     for (i = 0; i + 1 < length; i += 5) {
2821         if (!((~AV_RN32A(src + i) &
2822                (AV_RN32A(src + i) - 0x01000101U)) &
2823               0x80008080U))
2824             continue;
2825         FIND_FIRST_ZERO;
2826         STARTCODE_TEST;
2827         i -= 3;
2828     }
2829 #endif /* HAVE_FAST_64BIT */
2830 #else
2831     for (i = 0; i + 1 < length; i += 2) {
2832         if (src[i])
2833             continue;
2834         if (i > 0 && src[i - 1] == 0)
2835             i--;
2836         STARTCODE_TEST;
2837     }
2838 #endif /* HAVE_FAST_UNALIGNED */
2839
2840     if (i >= length - 1) { // no escaped 0
2841         nal->data     =
2842         nal->raw_data = src;
2843         nal->size     =
2844         nal->raw_size = length;
2845         return length;
2846     }
2847
2848     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2849                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2850     if (!nal->rbsp_buffer)
2851         return AVERROR(ENOMEM);
2852
2853     dst = nal->rbsp_buffer;
2854
2855     memcpy(dst, src, i);
2856     si = di = i;
2857     while (si + 2 < length) {
2858         // remove escapes (very rare 1:2^22)
2859         if (src[si + 2] > 3) {
2860             dst[di++] = src[si++];
2861             dst[di++] = src[si++];
2862         } else if (src[si] == 0 && src[si + 1] == 0) {
2863             if (src[si + 2] == 3) { // escape
2864                 dst[di++] = 0;
2865                 dst[di++] = 0;
2866                 si       += 3;
2867
2868                 s->skipped_bytes++;
2869                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2870                     s->skipped_bytes_pos_size *= 2;
2871                     av_reallocp_array(&s->skipped_bytes_pos,
2872                             s->skipped_bytes_pos_size,
2873                             sizeof(*s->skipped_bytes_pos));
2874                     if (!s->skipped_bytes_pos)
2875                         return AVERROR(ENOMEM);
2876                 }
2877                 if (s->skipped_bytes_pos)
2878                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2879                 continue;
2880             } else // next start code
2881                 goto nsc;
2882         }
2883
2884         dst[di++] = src[si++];
2885     }
2886     while (si < length)
2887         dst[di++] = src[si++];
2888
2889 nsc:
2890     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2891
2892     nal->data = dst;
2893     nal->size = di;
2894     nal->raw_data = src;
2895     nal->raw_size = si;
2896     return si;
2897 }
2898
2899 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2900 {
2901     int i, consumed, ret = 0;
2902
2903     s->ref = NULL;
2904     s->last_eos = s->eos;
2905     s->eos = 0;
2906
2907     /* split the input packet into NAL units, so we know the upper bound on the
2908      * number of slices in the frame */
2909     s->nb_nals = 0;
2910     while (length >= 4) {
2911         HEVCNAL *nal;
2912         int extract_length = 0;
2913
2914         if (s->is_nalff) {
2915             int i;
2916             for (i = 0; i < s->nal_length_size; i++)
2917                 extract_length = (extract_length << 8) | buf[i];
2918             buf    += s->nal_length_size;
2919             length -= s->nal_length_size;
2920
2921             if (extract_length > length) {
2922                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2923                 ret = AVERROR_INVALIDDATA;
2924                 goto fail;
2925             }
2926         } else {
2927             /* search start code */
2928             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2929                 ++buf;
2930                 --length;
2931                 if (length < 4) {
2932                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2933                     ret = AVERROR_INVALIDDATA;
2934                     goto fail;
2935                 }
2936             }
2937
2938             buf           += 3;
2939             length        -= 3;
2940         }
2941
2942         if (!s->is_nalff)
2943             extract_length = length;
2944
2945         if (s->nals_allocated < s->nb_nals + 1) {
2946             int new_size = s->nals_allocated + 1;
2947             void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2948             ret = AVERROR(ENOMEM);
2949             if (!tmp) {
2950                 goto fail;
2951             }
2952             s->nals = tmp;
2953             memset(s->nals + s->nals_allocated, 0,
2954                    (new_size - s->nals_allocated) * sizeof(*s->nals));
2955
2956             tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2957             if (!tmp)
2958                 goto fail;
2959             s->skipped_bytes_nal = tmp;
2960
2961             tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2962             if (!tmp)
2963                 goto fail;
2964             s->skipped_bytes_pos_size_nal = tmp;
2965
2966             tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2967             if (!tmp)
2968                 goto fail;
2969             s->skipped_bytes_pos_nal = tmp;
2970
2971             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2972             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2973             if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2974                 goto fail;
2975             s->nals_allocated = new_size;
2976         }
2977         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2978         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2979         nal = &s->nals[s->nb_nals];
2980
2981         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2982
2983         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2984         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2985         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2986
2987
2988         if (consumed < 0) {
2989             ret = consumed;
2990             goto fail;
2991         }
2992
2993         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2994         if (ret < 0)
2995             goto fail;
2996         hls_nal_unit(s);
2997
2998         if (s->nal_unit_type == NAL_EOB_NUT ||
2999             s->nal_unit_type == NAL_EOS_NUT)
3000             s->eos = 1;
3001
3002         buf    += consumed;
3003         length -= consumed;
3004     }
3005
3006     /* parse the NAL units */
3007     for (i = 0; i < s->nb_nals; i++) {
3008         s->skipped_bytes = s->skipped_bytes_nal[i];
3009         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
3010
3011         ret = decode_nal_unit(s, &s->nals[i]);
3012         if (ret < 0) {
3013             av_log(s->avctx, AV_LOG_WARNING,
3014                    "Error parsing NAL unit #%d.\n", i);
3015             goto fail;
3016         }
3017     }
3018
3019 fail:
3020     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3021         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3022
3023     return ret;
3024 }
3025
3026 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3027 {
3028     int i;
3029     for (i = 0; i < 16; i++)
3030         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3031 }
3032
3033 static int verify_md5(HEVCContext *s, AVFrame *frame)
3034 {
3035     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3036     int pixel_shift;
3037     int i, j;
3038
3039     if (!desc)
3040         return AVERROR(EINVAL);
3041
3042     pixel_shift = desc->comp[0].depth_minus1 > 7;
3043
3044     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3045            s->poc);
3046
3047     /* the checksums are LE, so we have to byteswap for >8bpp formats
3048      * on BE arches */
3049 #if HAVE_BIGENDIAN
3050     if (pixel_shift && !s->checksum_buf) {
3051         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3052                        FFMAX3(frame->linesize[0], frame->linesize[1],
3053                               frame->linesize[2]));
3054         if (!s->checksum_buf)
3055             return AVERROR(ENOMEM);
3056     }
3057 #endif
3058
3059     for (i = 0; frame->data[i]; i++) {
3060         int width  = s->avctx->coded_width;
3061         int height = s->avctx->coded_height;
3062         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3063         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3064         uint8_t md5[16];
3065
3066         av_md5_init(s->md5_ctx);
3067         for (j = 0; j < h; j++) {
3068             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3069 #if HAVE_BIGENDIAN
3070             if (pixel_shift) {
3071                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3072                                     (const uint16_t *) src, w);
3073                 src = s->checksum_buf;
3074             }
3075 #endif
3076             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3077         }
3078         av_md5_final(s->md5_ctx, md5);
3079
3080         if (!memcmp(md5, s->md5[i], 16)) {
3081             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3082             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3083             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3084         } else {
3085             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3086             print_md5(s->avctx, AV_LOG_ERROR, md5);
3087             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3088             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3089             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3090             return AVERROR_INVALIDDATA;
3091         }
3092     }
3093
3094     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3095
3096     return 0;
3097 }
3098
3099 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3100                              AVPacket *avpkt)
3101 {
3102     int ret;
3103     HEVCContext *s = avctx->priv_data;
3104
3105     if (!avpkt->size) {
3106         ret = ff_hevc_output_frame(s, data, 1);
3107         if (ret < 0)
3108             return ret;
3109
3110         *got_output = ret;
3111         return 0;
3112     }
3113
3114     s->ref = NULL;
3115     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3116     if (ret < 0)
3117         return ret;
3118
3119     if (avctx->hwaccel) {
3120         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3121             av_log(avctx, AV_LOG_ERROR,
3122                    "hardware accelerator failed to decode picture\n");
3123     } else {
3124         /* verify the SEI checksum */
3125         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3126             s->is_md5) {
3127             ret = verify_md5(s, s->ref->frame);
3128             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3129                 ff_hevc_unref_frame(s, s->ref, ~0);
3130                 return ret;
3131             }
3132         }
3133     }
3134     s->is_md5 = 0;
3135
3136     if (s->is_decoded) {
3137         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3138         s->is_decoded = 0;
3139     }
3140
3141     if (s->output_frame->buf[0]) {
3142         av_frame_move_ref(data, s->output_frame);
3143         *got_output = 1;
3144     }
3145
3146     return avpkt->size;
3147 }
3148
3149 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3150 {
3151     int ret;
3152
3153     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3154     if (ret < 0)
3155         return ret;
3156
3157     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3158     if (!dst->tab_mvf_buf)
3159         goto fail;
3160     dst->tab_mvf = src->tab_mvf;
3161
3162     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3163     if (!dst->rpl_tab_buf)
3164         goto fail;
3165     dst->rpl_tab = src->rpl_tab;
3166
3167     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3168     if (!dst->rpl_buf)
3169         goto fail;
3170
3171     dst->poc        = src->poc;
3172     dst->ctb_count  = src->ctb_count;
3173     dst->window     = src->window;
3174     dst->flags      = src->flags;
3175     dst->sequence   = src->sequence;
3176
3177     if (src->hwaccel_picture_private) {
3178         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3179         if (!dst->hwaccel_priv_buf)
3180             goto fail;
3181         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3182     }
3183
3184     return 0;
3185 fail:
3186     ff_hevc_unref_frame(s, dst, ~0);
3187     return AVERROR(ENOMEM);
3188 }
3189
3190 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3191 {
3192     HEVCContext       *s = avctx->priv_data;
3193     int i;
3194
3195     pic_arrays_free(s);
3196
3197     av_freep(&s->md5_ctx);
3198
3199     for(i=0; i < s->nals_allocated; i++) {
3200         av_freep(&s->skipped_bytes_pos_nal[i]);
3201     }
3202     av_freep(&s->skipped_bytes_pos_size_nal);
3203     av_freep(&s->skipped_bytes_nal);
3204     av_freep(&s->skipped_bytes_pos_nal);
3205
3206     av_freep(&s->cabac_state);
3207
3208     for (i = 0; i < 3; i++) {
3209         av_freep(&s->sao_pixel_buffer_h[i]);
3210         av_freep(&s->sao_pixel_buffer_v[i]);
3211     }
3212     av_frame_free(&s->output_frame);
3213
3214     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3215         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3216         av_frame_free(&s->DPB[i].frame);
3217     }
3218
3219     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3220         av_buffer_unref(&s->vps_list[i]);
3221     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3222         av_buffer_unref(&s->sps_list[i]);
3223     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3224         av_buffer_unref(&s->pps_list[i]);
3225     s->sps = NULL;
3226     s->pps = NULL;
3227     s->vps = NULL;
3228
3229     av_freep(&s->sh.entry_point_offset);
3230     av_freep(&s->sh.offset);
3231     av_freep(&s->sh.size);
3232
3233     for (i = 1; i < s->threads_number; i++) {
3234         HEVCLocalContext *lc = s->HEVClcList[i];
3235         if (lc) {
3236             av_freep(&s->HEVClcList[i]);
3237             av_freep(&s->sList[i]);
3238         }
3239     }
3240     if (s->HEVClc == s->HEVClcList[0])
3241         s->HEVClc = NULL;
3242     av_freep(&s->HEVClcList[0]);
3243
3244     for (i = 0; i < s->nals_allocated; i++)
3245         av_freep(&s->nals[i].rbsp_buffer);
3246     av_freep(&s->nals);
3247     s->nals_allocated = 0;
3248
3249     return 0;
3250 }
3251
3252 static av_cold int hevc_init_context(AVCodecContext *avctx)
3253 {
3254     HEVCContext *s = avctx->priv_data;
3255     int i;
3256
3257     s->avctx = avctx;
3258
3259     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3260     if (!s->HEVClc)
3261         goto fail;
3262     s->HEVClcList[0] = s->HEVClc;
3263     s->sList[0] = s;
3264
3265     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3266     if (!s->cabac_state)
3267         goto fail;
3268
3269     s->output_frame = av_frame_alloc();
3270     if (!s->output_frame)
3271         goto fail;
3272
3273     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3274         s->DPB[i].frame = av_frame_alloc();
3275         if (!s->DPB[i].frame)
3276             goto fail;
3277         s->DPB[i].tf.f = s->DPB[i].frame;
3278     }
3279
3280     s->max_ra = INT_MAX;
3281
3282     s->md5_ctx = av_md5_alloc();
3283     if (!s->md5_ctx)
3284         goto fail;
3285
3286     ff_bswapdsp_init(&s->bdsp);
3287
3288     s->context_initialized = 1;
3289     s->eos = 0;
3290
3291     return 0;
3292
3293 fail:
3294     hevc_decode_free(avctx);
3295     return AVERROR(ENOMEM);
3296 }
3297
3298 static int hevc_update_thread_context(AVCodecContext *dst,
3299                                       const AVCodecContext *src)
3300 {
3301     HEVCContext *s  = dst->priv_data;
3302     HEVCContext *s0 = src->priv_data;
3303     int i, ret;
3304
3305     if (!s->context_initialized) {
3306         ret = hevc_init_context(dst);
3307         if (ret < 0)
3308             return ret;
3309     }
3310
3311     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3312         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3313         if (s0->DPB[i].frame->buf[0]) {
3314             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3315             if (ret < 0)
3316                 return ret;
3317         }
3318     }
3319
3320     if (s->sps != s0->sps)
3321         s->sps = NULL;
3322     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3323         av_buffer_unref(&s->vps_list[i]);
3324         if (s0->vps_list[i]) {
3325             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3326             if (!s->vps_list[i])
3327                 return AVERROR(ENOMEM);
3328         }
3329     }
3330
3331     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3332         av_buffer_unref(&s->sps_list[i]);
3333         if (s0->sps_list[i]) {
3334             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3335             if (!s->sps_list[i])
3336                 return AVERROR(ENOMEM);
3337         }
3338     }
3339
3340     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3341         av_buffer_unref(&s->pps_list[i]);
3342         if (s0->pps_list[i]) {
3343             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3344             if (!s->pps_list[i])
3345                 return AVERROR(ENOMEM);
3346         }
3347     }
3348
3349     if (s->sps != s0->sps)
3350         if ((ret = set_sps(s, s0->sps, src->pix_fmt)) < 0)
3351             return ret;
3352
3353     s->seq_decode = s0->seq_decode;
3354     s->seq_output = s0->seq_output;
3355     s->pocTid0    = s0->pocTid0;
3356     s->max_ra     = s0->max_ra;
3357     s->eos        = s0->eos;
3358
3359     s->is_nalff        = s0->is_nalff;
3360     s->nal_length_size = s0->nal_length_size;
3361
3362     s->threads_number      = s0->threads_number;
3363     s->threads_type        = s0->threads_type;
3364
3365     if (s0->eos) {
3366         s->seq_decode = (s->seq_decode + 1) & 0xff;
3367         s->max_ra = INT_MAX;
3368     }
3369
3370     return 0;
3371 }
3372
3373 static int hevc_decode_extradata(HEVCContext *s)
3374 {
3375     AVCodecContext *avctx = s->avctx;
3376     GetByteContext gb;
3377     int ret, i;
3378
3379     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3380
3381     if (avctx->extradata_size > 3 &&
3382         (avctx->extradata[0] || avctx->extradata[1] ||
3383          avctx->extradata[2] > 1)) {
3384         /* It seems the extradata is encoded as hvcC format.
3385          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3386          * is finalized. When finalized, configurationVersion will be 1 and we
3387          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3388         int i, j, num_arrays, nal_len_size;
3389
3390         s->is_nalff = 1;
3391
3392         bytestream2_skip(&gb, 21);
3393         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3394         num_arrays   = bytestream2_get_byte(&gb);
3395
3396         /* nal units in the hvcC always have length coded with 2 bytes,
3397          * so put a fake nal_length_size = 2 while parsing them */
3398         s->nal_length_size = 2;
3399
3400         /* Decode nal units from hvcC. */
3401         for (i = 0; i < num_arrays; i++) {
3402             int type = bytestream2_get_byte(&gb) & 0x3f;
3403             int cnt  = bytestream2_get_be16(&gb);
3404
3405             for (j = 0; j < cnt; j++) {
3406                 // +2 for the nal size field
3407                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3408                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3409                     av_log(s->avctx, AV_LOG_ERROR,
3410                            "Invalid NAL unit size in extradata.\n");
3411                     return AVERROR_INVALIDDATA;
3412                 }
3413
3414                 ret = decode_nal_units(s, gb.buffer, nalsize);
3415                 if (ret < 0) {
3416                     av_log(avctx, AV_LOG_ERROR,
3417                            "Decoding nal unit %d %d from hvcC failed\n",
3418                            type, i);
3419                     return ret;
3420                 }
3421                 bytestream2_skip(&gb, nalsize);
3422             }
3423         }
3424
3425         /* Now store right nal length size, that will be used to parse
3426          * all other nals */
3427         s->nal_length_size = nal_len_size;
3428     } else {
3429         s->is_nalff = 0;
3430         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3431         if (ret < 0)
3432             return ret;
3433     }
3434
3435     /* export stream parameters from the first SPS */
3436     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3437         if (s->sps_list[i]) {
3438             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3439             export_stream_params(s->avctx, s, sps);
3440             break;
3441         }
3442     }
3443
3444     return 0;
3445 }
3446
3447 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3448 {
3449     HEVCContext *s = avctx->priv_data;
3450     int ret;
3451
3452     ff_init_cabac_states();
3453
3454     avctx->internal->allocate_progress = 1;
3455
3456     ret = hevc_init_context(avctx);
3457     if (ret < 0)
3458         return ret;
3459
3460     s->enable_parallel_tiles = 0;
3461     s->picture_struct = 0;
3462
3463     if(avctx->active_thread_type & FF_THREAD_SLICE)
3464         s->threads_number = avctx->thread_count;
3465     else
3466         s->threads_number = 1;
3467
3468     if (avctx->extradata_size > 0 && avctx->extradata) {
3469         ret = hevc_decode_extradata(s);
3470         if (ret < 0) {
3471             hevc_decode_free(avctx);
3472             return ret;
3473         }
3474     }
3475
3476     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3477             s->threads_type = FF_THREAD_FRAME;
3478         else
3479             s->threads_type = FF_THREAD_SLICE;
3480
3481     return 0;
3482 }
3483
3484 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3485 {
3486     HEVCContext *s = avctx->priv_data;
3487     int ret;
3488
3489     memset(s, 0, sizeof(*s));
3490
3491     ret = hevc_init_context(avctx);
3492     if (ret < 0)
3493         return ret;
3494
3495     return 0;
3496 }
3497
3498 static void hevc_decode_flush(AVCodecContext *avctx)
3499 {
3500     HEVCContext *s = avctx->priv_data;
3501     ff_hevc_flush_dpb(s);
3502     s->max_ra = INT_MAX;
3503 }
3504
3505 #define OFFSET(x) offsetof(HEVCContext, x)
3506 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3507
3508 static const AVProfile profiles[] = {
3509     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3510     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3511     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3512     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3513     { FF_PROFILE_UNKNOWN },
3514 };
3515
3516 static const AVOption options[] = {
3517     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3518         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3519     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3520         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3521     { NULL },
3522 };
3523
3524 static const AVClass hevc_decoder_class = {
3525     .class_name = "HEVC decoder",
3526     .item_name  = av_default_item_name,
3527     .option     = options,
3528     .version    = LIBAVUTIL_VERSION_INT,
3529 };
3530
3531 AVCodec ff_hevc_decoder = {
3532     .name                  = "hevc",
3533     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3534     .type                  = AVMEDIA_TYPE_VIDEO,
3535     .id                    = AV_CODEC_ID_HEVC,
3536     .priv_data_size        = sizeof(HEVCContext),
3537     .priv_class            = &hevc_decoder_class,
3538     .init                  = hevc_decode_init,
3539     .close                 = hevc_decode_free,
3540     .decode                = hevc_decode_frame,
3541     .flush                 = hevc_decode_flush,
3542     .update_thread_context = hevc_update_thread_context,
3543     .init_thread_copy      = hevc_init_thread_copy,
3544     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3545                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3546     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3547 };