git.sesse.net Git - ffmpeg/blob - libavcodec/hevc.c

   1 /*
   2  * HEVC video Decoder
   3  *
   4  * Copyright (C) 2012 - 2013 Guillaume Martres
   5  * Copyright (C) 2012 - 2013 Mickael Raulet
   6  * Copyright (C) 2012 - 2013 Gildas Cocherel
   7  * Copyright (C) 2012 - 2013 Wassim Hamidouche
   8  *
   9  * This file is part of FFmpeg.
  10  *
  11  * FFmpeg is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public
  13  * License as published by the Free Software Foundation; either
  14  * version 2.1 of the License, or (at your option) any later version.
  15  *
  16  * FFmpeg is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with FFmpeg; if not, write to the Free Software
  23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  24  */
  25
  26 #include "libavutil/atomic.h"
  27 #include "libavutil/attributes.h"
  28 #include "libavutil/common.h"
  29 #include "libavutil/display.h"
  30 #include "libavutil/internal.h"
  31 #include "libavutil/md5.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/pixdesc.h"
  34 #include "libavutil/stereo3d.h"
  35
  36 #include "bswapdsp.h"
  37 #include "bytestream.h"
  38 #include "cabac_functions.h"
  39 #include "golomb.h"
  40 #include "hevc.h"
  41
  42 const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
  43
  44 /**
  45  * NOTE: Each function hls_foo correspond to the function foo in the
  46  * specification (HLS stands for High Level Syntax).
  47  */
  48
  49 /**
  50  * Section 5.7
  51  */
  52
  53 /* free everything allocated  by pic_arrays_init() */
  54 static void pic_arrays_free(HEVCContext *s)
  55 {
  56     av_freep(&s->sao);
  57     av_freep(&s->deblock);
  58
  59     av_freep(&s->skip_flag);
  60     av_freep(&s->tab_ct_depth);
  61
  62     av_freep(&s->tab_ipm);
  63     av_freep(&s->cbf_luma);
  64     av_freep(&s->is_pcm);
  65
  66     av_freep(&s->qp_y_tab);
  67     av_freep(&s->tab_slice_address);
  68     av_freep(&s->filter_slice_edges);
  69
  70     av_freep(&s->horizontal_bs);
  71     av_freep(&s->vertical_bs);
  72
  73     av_freep(&s->sh.entry_point_offset);
  74     av_freep(&s->sh.size);
  75     av_freep(&s->sh.offset);
  76
  77     av_buffer_pool_uninit(&s->tab_mvf_pool);
  78     av_buffer_pool_uninit(&s->rpl_tab_pool);
  79 }
  80
  81 /* allocate arrays that depend on frame dimensions */
  82 static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
  83 {
  84     int log2_min_cb_size = sps->log2_min_cb_size;
  85     int width            = sps->width;
  86     int height           = sps->height;
  87     int pic_size_in_ctb  = ((width  >> log2_min_cb_size) + 1) *
  88                            ((height >> log2_min_cb_size) + 1);
  89     int ctb_count        = sps->ctb_width * sps->ctb_height;
  90     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
  91
  92     s->bs_width  = (width  >> 2) + 1;
  93     s->bs_height = (height >> 2) + 1;
  94
  95     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
  96     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
  97     if (!s->sao || !s->deblock)
  98         goto fail;
  99
 100     s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 101     s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 102     if (!s->skip_flag || !s->tab_ct_depth)
 103         goto fail;
 104
 105     s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 106     s->tab_ipm  = av_mallocz(min_pu_size);
 107     s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
 108     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
 109         goto fail;
 110
 111     s->filter_slice_edges = av_mallocz(ctb_count);
 112     s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
 113                                       sizeof(*s->tab_slice_address));
 114     s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
 115                                       sizeof(*s->qp_y_tab));
 116     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 117         goto fail;
 118
 119     s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
 120     s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
 121     if (!s->horizontal_bs || !s->vertical_bs)
 122         goto fail;
 123
 124     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
 125                                           av_buffer_allocz);
 126     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
 127                                           av_buffer_allocz);
 128     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
 129         goto fail;
 130
 131     return 0;
 132
 133 fail:
 134     pic_arrays_free(s);
 135     return AVERROR(ENOMEM);
 136 }
 137
 138 static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
 139 {
 140     int i = 0;
 141     int j = 0;
 142     uint8_t luma_weight_l0_flag[16];
 143     uint8_t chroma_weight_l0_flag[16];
 144     uint8_t luma_weight_l1_flag[16];
 145     uint8_t chroma_weight_l1_flag[16];
 146     int luma_log2_weight_denom;
 147
 148     luma_log2_weight_denom = get_ue_golomb_long(gb);
 149     if (luma_log2_weight_denom < 0 || luma_log2_weight_denom > 7)
 150         av_log(s->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom);
 151     s->sh.luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
 152     if (s->sps->chroma_format_idc != 0) {
 153         int delta = get_se_golomb(gb);
 154         s->sh.chroma_log2_weight_denom = av_clip_uintp2(s->sh.luma_log2_weight_denom + delta, 3);
 155     }
 156
 157     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 158         luma_weight_l0_flag[i] = get_bits1(gb);
 159         if (!luma_weight_l0_flag[i]) {
 160             s->sh.luma_weight_l0[i] = 1 << s->sh.luma_log2_weight_denom;
 161             s->sh.luma_offset_l0[i] = 0;
 162         }
 163     }
 164     if (s->sps->chroma_format_idc != 0) {
 165         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 166             chroma_weight_l0_flag[i] = get_bits1(gb);
 167     } else {
 168         for (i = 0; i < s->sh.nb_refs[L0]; i++)
 169             chroma_weight_l0_flag[i] = 0;
 170     }
 171     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
 172         if (luma_weight_l0_flag[i]) {
 173             int delta_luma_weight_l0 = get_se_golomb(gb);
 174             s->sh.luma_weight_l0[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l0;
 175             s->sh.luma_offset_l0[i] = get_se_golomb(gb);
 176         }
 177         if (chroma_weight_l0_flag[i]) {
 178             for (j = 0; j < 2; j++) {
 179                 int delta_chroma_weight_l0 = get_se_golomb(gb);
 180                 int delta_chroma_offset_l0 = get_se_golomb(gb);
 181                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
 182                 s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
 183                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 184             }
 185         } else {
 186             s->sh.chroma_weight_l0[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 187             s->sh.chroma_offset_l0[i][0] = 0;
 188             s->sh.chroma_weight_l0[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 189             s->sh.chroma_offset_l0[i][1] = 0;
 190         }
 191     }
 192     if (s->sh.slice_type == B_SLICE) {
 193         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 194             luma_weight_l1_flag[i] = get_bits1(gb);
 195             if (!luma_weight_l1_flag[i]) {
 196                 s->sh.luma_weight_l1[i] = 1 << s->sh.luma_log2_weight_denom;
 197                 s->sh.luma_offset_l1[i] = 0;
 198             }
 199         }
 200         if (s->sps->chroma_format_idc != 0) {
 201             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 202                 chroma_weight_l1_flag[i] = get_bits1(gb);
 203         } else {
 204             for (i = 0; i < s->sh.nb_refs[L1]; i++)
 205                 chroma_weight_l1_flag[i] = 0;
 206         }
 207         for (i = 0; i < s->sh.nb_refs[L1]; i++) {
 208             if (luma_weight_l1_flag[i]) {
 209                 int delta_luma_weight_l1 = get_se_golomb(gb);
 210                 s->sh.luma_weight_l1[i] = (1 << s->sh.luma_log2_weight_denom) + delta_luma_weight_l1;
 211                 s->sh.luma_offset_l1[i] = get_se_golomb(gb);
 212             }
 213             if (chroma_weight_l1_flag[i]) {
 214                 for (j = 0; j < 2; j++) {
 215                     int delta_chroma_weight_l1 = get_se_golomb(gb);
 216                     int delta_chroma_offset_l1 = get_se_golomb(gb);
 217                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
 218                     s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
 219                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
 220                 }
 221             } else {
 222                 s->sh.chroma_weight_l1[i][0] = 1 << s->sh.chroma_log2_weight_denom;
 223                 s->sh.chroma_offset_l1[i][0] = 0;
 224                 s->sh.chroma_weight_l1[i][1] = 1 << s->sh.chroma_log2_weight_denom;
 225                 s->sh.chroma_offset_l1[i][1] = 0;
 226             }
 227         }
 228     }
 229 }
 230
 231 static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
 232 {
 233     const HEVCSPS *sps = s->sps;
 234     int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
 235     int prev_delta_msb = 0;
 236     unsigned int nb_sps = 0, nb_sh;
 237     int i;
 238
 239     rps->nb_refs = 0;
 240     if (!sps->long_term_ref_pics_present_flag)
 241         return 0;
 242
 243     if (sps->num_long_term_ref_pics_sps > 0)
 244         nb_sps = get_ue_golomb_long(gb);
 245     nb_sh = get_ue_golomb_long(gb);
 246
 247     if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
 248         return AVERROR_INVALIDDATA;
 249
 250     rps->nb_refs = nb_sh + nb_sps;
 251
 252     for (i = 0; i < rps->nb_refs; i++) {
 253         uint8_t delta_poc_msb_present;
 254
 255         if (i < nb_sps) {
 256             uint8_t lt_idx_sps = 0;
 257
 258             if (sps->num_long_term_ref_pics_sps > 1)
 259                 lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
 260
 261             rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
 262             rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
 263         } else {
 264             rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
 265             rps->used[i] = get_bits1(gb);
 266         }
 267
 268         delta_poc_msb_present = get_bits1(gb);
 269         if (delta_poc_msb_present) {
 270             int delta = get_ue_golomb_long(gb);
 271
 272             if (i && i != nb_sps)
 273                 delta += prev_delta_msb;
 274
 275             rps->poc[i] += s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
 276             prev_delta_msb = delta;
 277         }
 278     }
 279
 280     return 0;
 281 }
 282
 283 static void export_stream_params(AVCodecContext *avctx,
 284                                  const HEVCContext *s, const HEVCSPS *sps)
 285 {
 286     const HEVCVPS *vps = (const HEVCVPS*)s->vps_list[sps->vps_id]->data;
 287     unsigned int num = 0, den = 0;
 288
 289     avctx->pix_fmt             = sps->pix_fmt;
 290     avctx->coded_width         = sps->width;
 291     avctx->coded_height        = sps->height;
 292     avctx->width               = sps->output_width;
 293     avctx->height              = sps->output_height;
 294     avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
 295     avctx->profile             = sps->ptl.general_ptl.profile_idc;
 296     avctx->level               = sps->ptl.general_ptl.level_idc;
 297
 298     ff_set_sar(avctx, sps->vui.sar);
 299
 300     if (sps->vui.video_signal_type_present_flag)
 301         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
 302                                                             : AVCOL_RANGE_MPEG;
 303     else
 304         avctx->color_range = AVCOL_RANGE_MPEG;
 305
 306     if (sps->vui.colour_description_present_flag) {
 307         avctx->color_primaries = sps->vui.colour_primaries;
 308         avctx->color_trc       = sps->vui.transfer_characteristic;
 309         avctx->colorspace      = sps->vui.matrix_coeffs;
 310     } else {
 311         avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
 312         avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
 313         avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
 314     }
 315
 316     if (vps->vps_timing_info_present_flag) {
 317         num = vps->vps_num_units_in_tick;
 318         den = vps->vps_time_scale;
 319     } else if (sps->vui.vui_timing_info_present_flag) {
 320         num = sps->vui.vui_num_units_in_tick;
 321         den = sps->vui.vui_time_scale;
 322     }
 323
 324     if (num != 0 && den != 0)
 325         av_reduce(&avctx->framerate.den, &avctx->framerate.num,
 326                   num, den, 1 << 30);
 327 }
 328
 329 static int set_sps(HEVCContext *s, const HEVCSPS *sps, enum AVPixelFormat pix_fmt)
 330 {
 331     #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + CONFIG_HEVC_D3D11VA_HWACCEL + CONFIG_HEVC_VDPAU_HWACCEL)
 332     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
 333     int ret, i;
 334
 335     export_stream_params(s->avctx, s, sps);
 336
 337     pic_arrays_free(s);
 338     ret = pic_arrays_init(s, sps);
 339     if (ret < 0)
 340         goto fail;
 341
 342     if (sps->pix_fmt == AV_PIX_FMT_YUV420P || sps->pix_fmt == AV_PIX_FMT_YUVJ420P) {
 343 #if CONFIG_HEVC_DXVA2_HWACCEL
 344         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
 345 #endif
 346 #if CONFIG_HEVC_D3D11VA_HWACCEL
 347         *fmt++ = AV_PIX_FMT_D3D11VA_VLD;
 348 #endif
 349 #if CONFIG_HEVC_VDPAU_HWACCEL
 350         *fmt++ = AV_PIX_FMT_VDPAU;
 351 #endif
 352     }
 353
 354     if (pix_fmt == AV_PIX_FMT_NONE) {
 355         *fmt++ = sps->pix_fmt;
 356         *fmt = AV_PIX_FMT_NONE;
 357
 358         ret = ff_thread_get_format(s->avctx, pix_fmts);
 359         if (ret < 0)
 360             goto fail;
 361         s->avctx->pix_fmt = ret;
 362     }
 363     else {
 364         s->avctx->pix_fmt = pix_fmt;
 365     }
 366
 367     ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
 368     ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
 369     ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 370
 371     for (i = 0; i < 3; i++) {
 372         av_freep(&s->sao_pixel_buffer_h[i]);
 373         av_freep(&s->sao_pixel_buffer_v[i]);
 374     }
 375
 376     if (sps->sao_enabled && !s->avctx->hwaccel) {
 377         int c_count = (sps->chroma_format_idc != 0) ? 3 : 1;
 378         int c_idx;
 379
 380         for(c_idx = 0; c_idx < c_count; c_idx++) {
 381             int w = sps->width >> sps->hshift[c_idx];
 382             int h = sps->height >> sps->vshift[c_idx];
 383             s->sao_pixel_buffer_h[c_idx] =
 384                 av_malloc((w * 2 * sps->ctb_height) <<
 385                           sps->pixel_shift);
 386             s->sao_pixel_buffer_v[c_idx] =
 387                 av_malloc((h * 2 * sps->ctb_width) <<
 388                           sps->pixel_shift);
 389         }
 390     }
 391
 392     s->sps = sps;
 393     s->vps = (HEVCVPS*) s->vps_list[s->sps->vps_id]->data;
 394
 395     return 0;
 396
 397 fail:
 398     pic_arrays_free(s);
 399     s->sps = NULL;
 400     return ret;
 401 }
 402
 403 static int hls_slice_header(HEVCContext *s)
 404 {
 405     GetBitContext *gb = &s->HEVClc->gb;
 406     SliceHeader *sh   = &s->sh;
 407     int i, ret;
 408
 409     // Coded parameters
 410     sh->first_slice_in_pic_flag = get_bits1(gb);
 411     if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
 412         s->seq_decode = (s->seq_decode + 1) & 0xff;
 413         s->max_ra     = INT_MAX;
 414         if (IS_IDR(s))
 415             ff_hevc_clear_refs(s);
 416     }
 417     sh->no_output_of_prior_pics_flag = 0;
 418     if (IS_IRAP(s))
 419         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 420
 421     sh->pps_id = get_ue_golomb_long(gb);
 422     if (sh->pps_id >= MAX_PPS_COUNT || !s->pps_list[sh->pps_id]) {
 423         av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
 424         return AVERROR_INVALIDDATA;
 425     }
 426     if (!sh->first_slice_in_pic_flag &&
 427         s->pps != (HEVCPPS*)s->pps_list[sh->pps_id]->data) {
 428         av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
 429         return AVERROR_INVALIDDATA;
 430     }
 431     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
 432     if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
 433         sh->no_output_of_prior_pics_flag = 1;
 434
 435     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
 436         const HEVCSPS* last_sps = s->sps;
 437         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
 438         if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
 439             if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
 440                 s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
 441                 last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
 442                 sh->no_output_of_prior_pics_flag = 0;
 443         }
 444         ff_hevc_clear_refs(s);
 445         ret = set_sps(s, s->sps, AV_PIX_FMT_NONE);
 446         if (ret < 0)
 447             return ret;
 448
 449         s->seq_decode = (s->seq_decode + 1) & 0xff;
 450         s->max_ra     = INT_MAX;
 451     }
 452
 453     sh->dependent_slice_segment_flag = 0;
 454     if (!sh->first_slice_in_pic_flag) {
 455         int slice_address_length;
 456
 457         if (s->pps->dependent_slice_segments_enabled_flag)
 458             sh->dependent_slice_segment_flag = get_bits1(gb);
 459
 460         slice_address_length = av_ceil_log2(s->sps->ctb_width *
 461                                             s->sps->ctb_height);
 462         sh->slice_segment_addr = get_bits(gb, slice_address_length);
 463         if (sh->slice_segment_addr >= s->sps->ctb_width * s->sps->ctb_height) {
 464             av_log(s->avctx, AV_LOG_ERROR,
 465                    "Invalid slice segment address: %u.\n",
 466                    sh->slice_segment_addr);
 467             return AVERROR_INVALIDDATA;
 468         }
 469
 470         if (!sh->dependent_slice_segment_flag) {
 471             sh->slice_addr = sh->slice_segment_addr;
 472             s->slice_idx++;
 473         }
 474     } else {
 475         sh->slice_segment_addr = sh->slice_addr = 0;
 476         s->slice_idx           = 0;
 477         s->slice_initialized   = 0;
 478     }
 479
 480     if (!sh->dependent_slice_segment_flag) {
 481         s->slice_initialized = 0;
 482
 483         for (i = 0; i < s->pps->num_extra_slice_header_bits; i++)
 484             skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
 485
 486         sh->slice_type = get_ue_golomb_long(gb);
 487         if (!(sh->slice_type == I_SLICE ||
 488               sh->slice_type == P_SLICE ||
 489               sh->slice_type == B_SLICE)) {
 490             av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
 491                    sh->slice_type);
 492             return AVERROR_INVALIDDATA;
 493         }
 494         if (IS_IRAP(s) && sh->slice_type != I_SLICE) {
 495             av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
 496             return AVERROR_INVALIDDATA;
 497         }
 498
 499         // when flag is not present, picture is inferred to be output
 500         sh->pic_output_flag = 1;
 501         if (s->pps->output_flag_present_flag)
 502             sh->pic_output_flag = get_bits1(gb);
 503
 504         if (s->sps->separate_colour_plane_flag)
 505             sh->colour_plane_id = get_bits(gb, 2);
 506
 507         if (!IS_IDR(s)) {
 508             int poc, pos;
 509
 510             sh->pic_order_cnt_lsb = get_bits(gb, s->sps->log2_max_poc_lsb);
 511             poc = ff_hevc_compute_poc(s, sh->pic_order_cnt_lsb);
 512             if (!sh->first_slice_in_pic_flag && poc != s->poc) {
 513                 av_log(s->avctx, AV_LOG_WARNING,
 514                        "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
 515                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 516                     return AVERROR_INVALIDDATA;
 517                 poc = s->poc;
 518             }
 519             s->poc = poc;
 520
 521             sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
 522             pos = get_bits_left(gb);
 523             if (!sh->short_term_ref_pic_set_sps_flag) {
 524                 ret = ff_hevc_decode_short_term_rps(s, &sh->slice_rps, s->sps, 1);
 525                 if (ret < 0)
 526                     return ret;
 527
 528                 sh->short_term_rps = &sh->slice_rps;
 529             } else {
 530                 int numbits, rps_idx;
 531
 532                 if (!s->sps->nb_st_rps) {
 533                     av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
 534                     return AVERROR_INVALIDDATA;
 535                 }
 536
 537                 numbits = av_ceil_log2(s->sps->nb_st_rps);
 538                 rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
 539                 sh->short_term_rps = &s->sps->st_rps[rps_idx];
 540             }
 541             sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
 542
 543             pos = get_bits_left(gb);
 544             ret = decode_lt_rps(s, &sh->long_term_rps, gb);
 545             if (ret < 0) {
 546                 av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
 547                 if (s->avctx->err_recognition & AV_EF_EXPLODE)
 548                     return AVERROR_INVALIDDATA;
 549             }
 550             sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
 551
 552             if (s->sps->sps_temporal_mvp_enabled_flag)
 553                 sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
 554             else
 555                 sh->slice_temporal_mvp_enabled_flag = 0;
 556         } else {
 557             s->sh.short_term_rps = NULL;
 558             s->poc               = 0;
 559         }
 560
 561         /* 8.3.1 */
 562         if (s->temporal_id == 0 &&
 563             s->nal_unit_type != NAL_TRAIL_N &&
 564             s->nal_unit_type != NAL_TSA_N   &&
 565             s->nal_unit_type != NAL_STSA_N  &&
 566             s->nal_unit_type != NAL_RADL_N  &&
 567             s->nal_unit_type != NAL_RADL_R  &&
 568             s->nal_unit_type != NAL_RASL_N  &&
 569             s->nal_unit_type != NAL_RASL_R)
 570             s->pocTid0 = s->poc;
 571
 572         if (s->sps->sao_enabled) {
 573             sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
 574             if (s->sps->chroma_format_idc) {
 575                 sh->slice_sample_adaptive_offset_flag[1] =
 576                 sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
 577             }
 578         } else {
 579             sh->slice_sample_adaptive_offset_flag[0] = 0;
 580             sh->slice_sample_adaptive_offset_flag[1] = 0;
 581             sh->slice_sample_adaptive_offset_flag[2] = 0;
 582         }
 583
 584         sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
 585         if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) {
 586             int nb_refs;
 587
 588             sh->nb_refs[L0] = s->pps->num_ref_idx_l0_default_active;
 589             if (sh->slice_type == B_SLICE)
 590                 sh->nb_refs[L1] = s->pps->num_ref_idx_l1_default_active;
 591
 592             if (get_bits1(gb)) { // num_ref_idx_active_override_flag
 593                 sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
 594                 if (sh->slice_type == B_SLICE)
 595                     sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
 596             }
 597             if (sh->nb_refs[L0] > MAX_REFS || sh->nb_refs[L1] > MAX_REFS) {
 598                 av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
 599                        sh->nb_refs[L0], sh->nb_refs[L1]);
 600                 return AVERROR_INVALIDDATA;
 601             }
 602
 603             sh->rpl_modification_flag[0] = 0;
 604             sh->rpl_modification_flag[1] = 0;
 605             nb_refs = ff_hevc_frame_nb_refs(s);
 606             if (!nb_refs) {
 607                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
 608                 return AVERROR_INVALIDDATA;
 609             }
 610
 611             if (s->pps->lists_modification_present_flag && nb_refs > 1) {
 612                 sh->rpl_modification_flag[0] = get_bits1(gb);
 613                 if (sh->rpl_modification_flag[0]) {
 614                     for (i = 0; i < sh->nb_refs[L0]; i++)
 615                         sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
 616                 }
 617
 618                 if (sh->slice_type == B_SLICE) {
 619                     sh->rpl_modification_flag[1] = get_bits1(gb);
 620                     if (sh->rpl_modification_flag[1] == 1)
 621                         for (i = 0; i < sh->nb_refs[L1]; i++)
 622                             sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
 623                 }
 624             }
 625
 626             if (sh->slice_type == B_SLICE)
 627                 sh->mvd_l1_zero_flag = get_bits1(gb);
 628
 629             if (s->pps->cabac_init_present_flag)
 630                 sh->cabac_init_flag = get_bits1(gb);
 631             else
 632                 sh->cabac_init_flag = 0;
 633
 634             sh->collocated_ref_idx = 0;
 635             if (sh->slice_temporal_mvp_enabled_flag) {
 636                 sh->collocated_list = L0;
 637                 if (sh->slice_type == B_SLICE)
 638                     sh->collocated_list = !get_bits1(gb);
 639
 640                 if (sh->nb_refs[sh->collocated_list] > 1) {
 641                     sh->collocated_ref_idx = get_ue_golomb_long(gb);
 642                     if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
 643                         av_log(s->avctx, AV_LOG_ERROR,
 644                                "Invalid collocated_ref_idx: %d.\n",
 645                                sh->collocated_ref_idx);
 646                         return AVERROR_INVALIDDATA;
 647                     }
 648                 }
 649             }
 650
 651             if ((s->pps->weighted_pred_flag   && sh->slice_type == P_SLICE) ||
 652                 (s->pps->weighted_bipred_flag && sh->slice_type == B_SLICE)) {
 653                 pred_weight_table(s, gb);
 654             }
 655
 656             sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
 657             if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
 658                 av_log(s->avctx, AV_LOG_ERROR,
 659                        "Invalid number of merging MVP candidates: %d.\n",
 660                        sh->max_num_merge_cand);
 661                 return AVERROR_INVALIDDATA;
 662             }
 663         }
 664
 665         sh->slice_qp_delta = get_se_golomb(gb);
 666
 667         if (s->pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 668             sh->slice_cb_qp_offset = get_se_golomb(gb);
 669             sh->slice_cr_qp_offset = get_se_golomb(gb);
 670         } else {
 671             sh->slice_cb_qp_offset = 0;
 672             sh->slice_cr_qp_offset = 0;
 673         }
 674
 675         if (s->pps->chroma_qp_offset_list_enabled_flag)
 676             sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
 677         else
 678             sh->cu_chroma_qp_offset_enabled_flag = 0;
 679
 680         if (s->pps->deblocking_filter_control_present_flag) {
 681             int deblocking_filter_override_flag = 0;
 682
 683             if (s->pps->deblocking_filter_override_enabled_flag)
 684                 deblocking_filter_override_flag = get_bits1(gb);
 685
 686             if (deblocking_filter_override_flag) {
 687                 sh->disable_deblocking_filter_flag = get_bits1(gb);
 688                 if (!sh->disable_deblocking_filter_flag) {
 689                     sh->beta_offset = get_se_golomb(gb) * 2;
 690                     sh->tc_offset   = get_se_golomb(gb) * 2;
 691                 }
 692             } else {
 693                 sh->disable_deblocking_filter_flag = s->pps->disable_dbf;
 694                 sh->beta_offset                    = s->pps->beta_offset;
 695                 sh->tc_offset                      = s->pps->tc_offset;
 696             }
 697         } else {
 698             sh->disable_deblocking_filter_flag = 0;
 699             sh->beta_offset                    = 0;
 700             sh->tc_offset                      = 0;
 701         }
 702
 703         if (s->pps->seq_loop_filter_across_slices_enabled_flag &&
 704             (sh->slice_sample_adaptive_offset_flag[0] ||
 705              sh->slice_sample_adaptive_offset_flag[1] ||
 706              !sh->disable_deblocking_filter_flag)) {
 707             sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
 708         } else {
 709             sh->slice_loop_filter_across_slices_enabled_flag = s->pps->seq_loop_filter_across_slices_enabled_flag;
 710         }
 711     } else if (!s->slice_initialized) {
 712         av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 713         return AVERROR_INVALIDDATA;
 714     }
 715
 716     sh->num_entry_point_offsets = 0;
 717     if (s->pps->tiles_enabled_flag || s->pps->entropy_coding_sync_enabled_flag) {
 718         unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 719         // It would be possible to bound this tighter but this here is simpler
 720         if (num_entry_point_offsets > get_bits_left(gb)) {
 721             av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
 722             return AVERROR_INVALIDDATA;
 723         }
 724
 725         sh->num_entry_point_offsets = num_entry_point_offsets;
 726         if (sh->num_entry_point_offsets > 0) {
 727             int offset_len = get_ue_golomb_long(gb) + 1;
 728
 729             if (offset_len < 1 || offset_len > 32) {
 730                 sh->num_entry_point_offsets = 0;
 731                 av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
 732                 return AVERROR_INVALIDDATA;
 733             }
 734
 735             av_freep(&sh->entry_point_offset);
 736             av_freep(&sh->offset);
 737             av_freep(&sh->size);
 738             sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 739             sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 740             sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
 741             if (!sh->entry_point_offset || !sh->offset || !sh->size) {
 742                 sh->num_entry_point_offsets = 0;
 743                 av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
 744                 return AVERROR(ENOMEM);
 745             }
 746             for (i = 0; i < sh->num_entry_point_offsets; i++) {
 747                 unsigned val = get_bits_long(gb, offset_len);
 748                 sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
 749             }
 750             if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
 751                 s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
 752                 s->threads_number = 1;
 753             } else
 754                 s->enable_parallel_tiles = 0;
 755         } else
 756             s->enable_parallel_tiles = 0;
 757     }
 758
 759     if (s->pps->slice_header_extension_present_flag) {
 760         unsigned int length = get_ue_golomb_long(gb);
 761         if (length*8LL > get_bits_left(gb)) {
 762             av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
 763             return AVERROR_INVALIDDATA;
 764         }
 765         for (i = 0; i < length; i++)
 766             skip_bits(gb, 8);  // slice_header_extension_data_byte
 767     }
 768
 769     // Inferred parameters
 770     sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
 771     if (sh->slice_qp > 51 ||
 772         sh->slice_qp < -s->sps->qp_bd_offset) {
 773         av_log(s->avctx, AV_LOG_ERROR,
 774                "The slice_qp %d is outside the valid range "
 775                "[%d, 51].\n",
 776                sh->slice_qp,
 777                -s->sps->qp_bd_offset);
 778         return AVERROR_INVALIDDATA;
 779     }
 780
 781     sh->slice_ctb_addr_rs = sh->slice_segment_addr;
 782
 783     if (!s->sh.slice_ctb_addr_rs && s->sh.dependent_slice_segment_flag) {
 784         av_log(s->avctx, AV_LOG_ERROR, "Impossible slice segment.\n");
 785         return AVERROR_INVALIDDATA;
 786     }
 787
 788     if (get_bits_left(gb) < 0) {
 789         av_log(s->avctx, AV_LOG_ERROR,
 790                "Overread slice header by %d bits\n", -get_bits_left(gb));
 791         return AVERROR_INVALIDDATA;
 792     }
 793
 794     s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 795
 796     if (!s->pps->cu_qp_delta_enabled_flag)
 797         s->HEVClc->qp_y = s->sh.slice_qp;
 798
 799     s->slice_initialized = 1;
 800     s->HEVClc->tu.cu_qp_offset_cb = 0;
 801     s->HEVClc->tu.cu_qp_offset_cr = 0;
 802
 803     return 0;
 804 }
 805
 806 #define CTB(tab, x, y) ((tab)[(y) * s->sps->ctb_width + (x)])
 807
 808 #define SET_SAO(elem, value)                            \
 809 do {                                                    \
 810     if (!sao_merge_up_flag && !sao_merge_left_flag)     \
 811         sao->elem = value;                              \
 812     else if (sao_merge_left_flag)                       \
 813         sao->elem = CTB(s->sao, rx-1, ry).elem;         \
 814     else if (sao_merge_up_flag)                         \
 815         sao->elem = CTB(s->sao, rx, ry-1).elem;         \
 816     else                                                \
 817         sao->elem = 0;                                  \
 818 } while (0)
 819
 820 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 821 {
 822     HEVCLocalContext *lc    = s->HEVClc;
 823     int sao_merge_left_flag = 0;
 824     int sao_merge_up_flag   = 0;
 825     SAOParams *sao          = &CTB(s->sao, rx, ry);
 826     int c_idx, i;
 827
 828     if (s->sh.slice_sample_adaptive_offset_flag[0] ||
 829         s->sh.slice_sample_adaptive_offset_flag[1]) {
 830         if (rx > 0) {
 831             if (lc->ctb_left_flag)
 832                 sao_merge_left_flag = ff_hevc_sao_merge_flag_decode(s);
 833         }
 834         if (ry > 0 && !sao_merge_left_flag) {
 835             if (lc->ctb_up_flag)
 836                 sao_merge_up_flag = ff_hevc_sao_merge_flag_decode(s);
 837         }
 838     }
 839
 840     for (c_idx = 0; c_idx < (s->sps->chroma_format_idc ? 3 : 1); c_idx++) {
 841         int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
 842                                                  s->pps->log2_sao_offset_scale_chroma;
 843
 844         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
 845             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
 846             continue;
 847         }
 848
 849         if (c_idx == 2) {
 850             sao->type_idx[2] = sao->type_idx[1];
 851             sao->eo_class[2] = sao->eo_class[1];
 852         } else {
 853             SET_SAO(type_idx[c_idx], ff_hevc_sao_type_idx_decode(s));
 854         }
 855
 856         if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
 857             continue;
 858
 859         for (i = 0; i < 4; i++)
 860             SET_SAO(offset_abs[c_idx][i], ff_hevc_sao_offset_abs_decode(s));
 861
 862         if (sao->type_idx[c_idx] == SAO_BAND) {
 863             for (i = 0; i < 4; i++) {
 864                 if (sao->offset_abs[c_idx][i]) {
 865                     SET_SAO(offset_sign[c_idx][i],
 866                             ff_hevc_sao_offset_sign_decode(s));
 867                 } else {
 868                     sao->offset_sign[c_idx][i] = 0;
 869                 }
 870             }
 871             SET_SAO(band_position[c_idx], ff_hevc_sao_band_position_decode(s));
 872         } else if (c_idx != 2) {
 873             SET_SAO(eo_class[c_idx], ff_hevc_sao_eo_class_decode(s));
 874         }
 875
 876         // Inferred parameters
 877         sao->offset_val[c_idx][0] = 0;
 878         for (i = 0; i < 4; i++) {
 879             sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
 880             if (sao->type_idx[c_idx] == SAO_EDGE) {
 881                 if (i > 1)
 882                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 883             } else if (sao->offset_sign[c_idx][i]) {
 884                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
 885             }
 886             sao->offset_val[c_idx][i + 1] *= 1 << log2_sao_offset_scale;
 887         }
 888     }
 889 }
 890
 891 #undef SET_SAO
 892 #undef CTB
 893
 894 static int hls_cross_component_pred(HEVCContext *s, int idx) {
 895     HEVCLocalContext *lc    = s->HEVClc;
 896     int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 897
 898     if (log2_res_scale_abs_plus1 !=  0) {
 899         int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
 900         lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
 901                                (1 - 2 * res_scale_sign_flag);
 902     } else {
 903         lc->tu.res_scale_val = 0;
 904     }
 905
 906
 907     return 0;
 908 }
 909
 910 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
 911                               int xBase, int yBase, int cb_xBase, int cb_yBase,
 912                               int log2_cb_size, int log2_trafo_size,
 913                               int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
 914 {
 915     HEVCLocalContext *lc = s->HEVClc;
 916     const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
 917     int i;
 918
 919     if (lc->cu.pred_mode == MODE_INTRA) {
 920         int trafo_size = 1 << log2_trafo_size;
 921         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 922
 923         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
 924     }
 925
 926     if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
 927         (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
 928         int scan_idx   = SCAN_DIAG;
 929         int scan_idx_c = SCAN_DIAG;
 930         int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
 931                          (s->sps->chroma_format_idc == 2 &&
 932                          (cbf_cb[1] || cbf_cr[1]));
 933
 934         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
 935             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
 936             if (lc->tu.cu_qp_delta != 0)
 937                 if (ff_hevc_cu_qp_delta_sign_flag(s) == 1)
 938                     lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
 939             lc->tu.is_cu_qp_delta_coded = 1;
 940
 941             if (lc->tu.cu_qp_delta < -(26 + s->sps->qp_bd_offset / 2) ||
 942                 lc->tu.cu_qp_delta >  (25 + s->sps->qp_bd_offset / 2)) {
 943                 av_log(s->avctx, AV_LOG_ERROR,
 944                        "The cu_qp_delta %d is outside the valid range "
 945                        "[%d, %d].\n",
 946                        lc->tu.cu_qp_delta,
 947                        -(26 + s->sps->qp_bd_offset / 2),
 948                         (25 + s->sps->qp_bd_offset / 2));
 949                 return AVERROR_INVALIDDATA;
 950             }
 951
 952             ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
 953         }
 954
 955         if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
 956             !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
 957             int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
 958             if (cu_chroma_qp_offset_flag) {
 959                 int cu_chroma_qp_offset_idx  = 0;
 960                 if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
 961                     cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
 962                     av_log(s->avctx, AV_LOG_ERROR,
 963                         "cu_chroma_qp_offset_idx not yet tested.\n");
 964                 }
 965                 lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
 966                 lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 967             } else {
 968                 lc->tu.cu_qp_offset_cb = 0;
 969                 lc->tu.cu_qp_offset_cr = 0;
 970             }
 971             lc->tu.is_cu_chroma_qp_offset_coded = 1;
 972         }
 973
 974         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
 975             if (lc->tu.intra_pred_mode >= 6 &&
 976                 lc->tu.intra_pred_mode <= 14) {
 977                 scan_idx = SCAN_VERT;
 978             } else if (lc->tu.intra_pred_mode >= 22 &&
 979                        lc->tu.intra_pred_mode <= 30) {
 980                 scan_idx = SCAN_HORIZ;
 981             }
 982
 983             if (lc->tu.intra_pred_mode_c >=  6 &&
 984                 lc->tu.intra_pred_mode_c <= 14) {
 985                 scan_idx_c = SCAN_VERT;
 986             } else if (lc->tu.intra_pred_mode_c >= 22 &&
 987                        lc->tu.intra_pred_mode_c <= 30) {
 988                 scan_idx_c = SCAN_HORIZ;
 989             }
 990         }
 991
 992         lc->tu.cross_pf = 0;
 993
 994         if (cbf_luma)
 995             ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
 996         if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
 997             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
 998             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
 999             lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
1000                                 (lc->cu.pred_mode == MODE_INTER ||
1001                                  (lc->tu.chroma_mode_c ==  4)));
1002
1003             if (lc->tu.cross_pf) {
1004                 hls_cross_component_pred(s, 0);
1005             }
1006             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1007                 if (lc->cu.pred_mode == MODE_INTRA) {
1008                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1009                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
1010                 }
1011                 if (cbf_cb[i])
1012                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1013                                                 log2_trafo_size_c, scan_idx_c, 1);
1014                 else
1015                     if (lc->tu.cross_pf) {
1016                         ptrdiff_t stride = s->frame->linesize[1];
1017                         int hshift = s->sps->hshift[1];
1018                         int vshift = s->sps->vshift[1];
1019                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1020                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1021                         int size = 1 << log2_trafo_size_c;
1022
1023                         uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
1024                                                               ((x0 >> hshift) << s->sps->pixel_shift)];
1025                         for (i = 0; i < (size * size); i++) {
1026                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1027                         }
1028                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1029                     }
1030             }
1031
1032             if (lc->tu.cross_pf) {
1033                 hls_cross_component_pred(s, 1);
1034             }
1035             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1036                 if (lc->cu.pred_mode == MODE_INTRA) {
1037                     ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
1038                     s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
1039                 }
1040                 if (cbf_cr[i])
1041                     ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
1042                                                 log2_trafo_size_c, scan_idx_c, 2);
1043                 else
1044                     if (lc->tu.cross_pf) {
1045                         ptrdiff_t stride = s->frame->linesize[2];
1046                         int hshift = s->sps->hshift[2];
1047                         int vshift = s->sps->vshift[2];
1048                         int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
1049                         int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
1050                         int size = 1 << log2_trafo_size_c;
1051
1052                         uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
1053                                                           ((x0 >> hshift) << s->sps->pixel_shift)];
1054                         for (i = 0; i < (size * size); i++) {
1055                             coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
1056                         }
1057                         s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride);
1058                     }
1059             }
1060         } else if (s->sps->chroma_format_idc && blk_idx == 3) {
1061             int trafo_size_h = 1 << (log2_trafo_size + 1);
1062             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1063             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1064                 if (lc->cu.pred_mode == MODE_INTRA) {
1065                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1066                                                     trafo_size_h, trafo_size_v);
1067                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
1068                 }
1069                 if (cbf_cb[i])
1070                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1071                                                 log2_trafo_size, scan_idx_c, 1);
1072             }
1073             for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
1074                 if (lc->cu.pred_mode == MODE_INTRA) {
1075                     ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
1076                                                 trafo_size_h, trafo_size_v);
1077                     s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
1078                 }
1079                 if (cbf_cr[i])
1080                     ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
1081                                                 log2_trafo_size, scan_idx_c, 2);
1082             }
1083         }
1084     } else if (s->sps->chroma_format_idc && lc->cu.pred_mode == MODE_INTRA) {
1085         if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
1086             int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
1087             int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
1088             ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
1089             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
1090             s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
1091             if (s->sps->chroma_format_idc == 2) {
1092                 ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
1093                                                 trafo_size_h, trafo_size_v);
1094                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
1095                 s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
1096             }
1097         } else if (blk_idx == 3) {
1098             int trafo_size_h = 1 << (log2_trafo_size + 1);
1099             int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
1100             ff_hevc_set_neighbour_available(s, xBase, yBase,
1101                                             trafo_size_h, trafo_size_v);
1102             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
1103             s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
1104             if (s->sps->chroma_format_idc == 2) {
1105                 ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
1106                                                 trafo_size_h, trafo_size_v);
1107                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
1108                 s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
1109             }
1110         }
1111     }
1112
1113     return 0;
1114 }
1115
1116 static void set_deblocking_bypass(HEVCContext *s, int x0, int y0, int log2_cb_size)
1117 {
1118     int cb_size          = 1 << log2_cb_size;
1119     int log2_min_pu_size = s->sps->log2_min_pu_size;
1120
1121     int min_pu_width     = s->sps->min_pu_width;
1122     int x_end = FFMIN(x0 + cb_size, s->sps->width);
1123     int y_end = FFMIN(y0 + cb_size, s->sps->height);
1124     int i, j;
1125
1126     for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
1127         for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
1128             s->is_pcm[i + j * min_pu_width] = 2;
1129 }
1130
1131 static int hls_transform_tree(HEVCContext *s, int x0, int y0,
1132                               int xBase, int yBase, int cb_xBase, int cb_yBase,
1133                               int log2_cb_size, int log2_trafo_size,
1134                               int trafo_depth, int blk_idx,
1135                               const int *base_cbf_cb, const int *base_cbf_cr)
1136 {
1137     HEVCLocalContext *lc = s->HEVClc;
1138     uint8_t split_transform_flag;
1139     int cbf_cb[2];
1140     int cbf_cr[2];
1141     int ret;
1142
1143     cbf_cb[0] = base_cbf_cb[0];
1144     cbf_cb[1] = base_cbf_cb[1];
1145     cbf_cr[0] = base_cbf_cr[0];
1146     cbf_cr[1] = base_cbf_cr[1];
1147
1148     if (lc->cu.intra_split_flag) {
1149         if (trafo_depth == 1) {
1150             lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
1151             if (s->sps->chroma_format_idc == 3) {
1152                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
1153                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
1154             } else {
1155                 lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1156                 lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1157             }
1158         }
1159     } else {
1160         lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
1161         lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
1162         lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
1163     }
1164
1165     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
1166         log2_trafo_size >  s->sps->log2_min_tb_size    &&
1167         trafo_depth     < lc->cu.max_trafo_depth       &&
1168         !(lc->cu.intra_split_flag && trafo_depth == 0)) {
1169         split_transform_flag = ff_hevc_split_transform_flag_decode(s, log2_trafo_size);
1170     } else {
1171         int inter_split = s->sps->max_transform_hierarchy_depth_inter == 0 &&
1172                           lc->cu.pred_mode == MODE_INTER &&
1173                           lc->cu.part_mode != PART_2Nx2N &&
1174                           trafo_depth == 0;
1175
1176         split_transform_flag = log2_trafo_size > s->sps->log2_max_trafo_size ||
1177                                (lc->cu.intra_split_flag && trafo_depth == 0) ||
1178                                inter_split;
1179     }
1180
1181     if (s->sps->chroma_format_idc && (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3)) {
1182         if (trafo_depth == 0 || cbf_cb[0]) {
1183             cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1184             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1185                 cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1186             }
1187         }
1188
1189         if (trafo_depth == 0 || cbf_cr[0]) {
1190             cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1191             if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
1192                 cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
1193             }
1194         }
1195     }
1196
1197     if (split_transform_flag) {
1198         const int trafo_size_split = 1 << (log2_trafo_size - 1);
1199         const int x1 = x0 + trafo_size_split;
1200         const int y1 = y0 + trafo_size_split;
1201
1202 #define SUBDIVIDE(x, y, idx)                                                    \
1203 do {                                                                            \
1204     ret = hls_transform_tree(s, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
1205                              log2_trafo_size - 1, trafo_depth + 1, idx,         \
1206                              cbf_cb, cbf_cr);                                   \
1207     if (ret < 0)                                                                \
1208         return ret;                                                             \
1209 } while (0)
1210
1211         SUBDIVIDE(x0, y0, 0);
1212         SUBDIVIDE(x1, y0, 1);
1213         SUBDIVIDE(x0, y1, 2);
1214         SUBDIVIDE(x1, y1, 3);
1215
1216 #undef SUBDIVIDE
1217     } else {
1218         int min_tu_size      = 1 << s->sps->log2_min_tb_size;
1219         int log2_min_tu_size = s->sps->log2_min_tb_size;
1220         int min_tu_width     = s->sps->min_tb_width;
1221         int cbf_luma         = 1;
1222
1223         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
1224             cbf_cb[0] || cbf_cr[0] ||
1225             (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
1226             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
1227         }
1228
1229         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
1230                                  log2_cb_size, log2_trafo_size,
1231                                  blk_idx, cbf_luma, cbf_cb, cbf_cr);
1232         if (ret < 0)
1233             return ret;
1234         // TODO: store cbf_luma somewhere else
1235         if (cbf_luma) {
1236             int i, j;
1237             for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
1238                 for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
1239                     int x_tu = (x0 + j) >> log2_min_tu_size;
1240                     int y_tu = (y0 + i) >> log2_min_tu_size;
1241                     s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
1242                 }
1243         }
1244         if (!s->sh.disable_deblocking_filter_flag) {
1245             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
1246             if (s->pps->transquant_bypass_enable_flag &&
1247                 lc->cu.cu_transquant_bypass_flag)
1248                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
1249         }
1250     }
1251     return 0;
1252 }
1253
1254 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
1255 {
1256     HEVCLocalContext *lc = s->HEVClc;
1257     GetBitContext gb;
1258     int cb_size   = 1 << log2_cb_size;
1259     int stride0   = s->frame->linesize[0];
1260     uint8_t *dst0 = &s->frame->data[0][y0 * stride0 + (x0 << s->sps->pixel_shift)];
1261     int   stride1 = s->frame->linesize[1];
1262     uint8_t *dst1 = &s->frame->data[1][(y0 >> s->sps->vshift[1]) * stride1 + ((x0 >> s->sps->hshift[1]) << s->sps->pixel_shift)];
1263     int   stride2 = s->frame->linesize[2];
1264     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
1265
1266     int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
1267                          (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
1268                           ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
1269                           s->sps->pcm.bit_depth_chroma;
1270     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
1271     int ret;
1272
1273     if (!s->sh.disable_deblocking_filter_flag)
1274         ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
1275
1276     ret = init_get_bits(&gb, pcm, length);
1277     if (ret < 0)
1278         return ret;
1279
1280     s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
1281     if (s->sps->chroma_format_idc) {
1282         s->hevcdsp.put_pcm(dst1, stride1,
1283                            cb_size >> s->sps->hshift[1],
1284                            cb_size >> s->sps->vshift[1],
1285                            &gb, s->sps->pcm.bit_depth_chroma);
1286         s->hevcdsp.put_pcm(dst2, stride2,
1287                            cb_size >> s->sps->hshift[2],
1288                            cb_size >> s->sps->vshift[2],
1289                            &gb, s->sps->pcm.bit_depth_chroma);
1290     }
1291
1292     return 0;
1293 }
1294
1295 /**
1296  * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
1297  *
1298  * @param s HEVC decoding context
1299  * @param dst target buffer for block data at block position
1300  * @param dststride stride of the dst buffer
1301  * @param ref reference picture buffer at origin (0, 0)
1302  * @param mv motion vector (relative to block position) to get pixel data from
1303  * @param x_off horizontal position of block from origin (0, 0)
1304  * @param y_off vertical position of block from origin (0, 0)
1305  * @param block_w width of block
1306  * @param block_h height of block
1307  * @param luma_weight weighting factor applied to the luma prediction
1308  * @param luma_offset additive offset applied to the luma prediction value
1309  */
1310
1311 static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1312                         AVFrame *ref, const Mv *mv, int x_off, int y_off,
1313                         int block_w, int block_h, int luma_weight, int luma_offset)
1314 {
1315     HEVCLocalContext *lc = s->HEVClc;
1316     uint8_t *src         = ref->data[0];
1317     ptrdiff_t srcstride  = ref->linesize[0];
1318     int pic_width        = s->sps->width;
1319     int pic_height       = s->sps->height;
1320     int mx               = mv->x & 3;
1321     int my               = mv->y & 3;
1322     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1323                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1324     int idx              = ff_hevc_pel_weight[block_w];
1325
1326     x_off += mv->x >> 2;
1327     y_off += mv->y >> 2;
1328     src   += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1329
1330     if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
1331         x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1332         y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1333         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1334         int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1335         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1336
1337         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
1338                                  edge_emu_stride, srcstride,
1339                                  block_w + QPEL_EXTRA,
1340                                  block_h + QPEL_EXTRA,
1341                                  x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
1342                                  pic_width, pic_height);
1343         src = lc->edge_emu_buffer + buf_offset;
1344         srcstride = edge_emu_stride;
1345     }
1346
1347     if (!weight_flag)
1348         s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
1349                                                       block_h, mx, my, block_w);
1350     else
1351         s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
1352                                                         block_h, s->sh.luma_log2_weight_denom,
1353                                                         luma_weight, luma_offset, mx, my, block_w);
1354 }
1355
1356 /**
1357  * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
1358  *
1359  * @param s HEVC decoding context
1360  * @param dst target buffer for block data at block position
1361  * @param dststride stride of the dst buffer
1362  * @param ref0 reference picture0 buffer at origin (0, 0)
1363  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1364  * @param x_off horizontal position of block from origin (0, 0)
1365  * @param y_off vertical position of block from origin (0, 0)
1366  * @param block_w width of block
1367  * @param block_h height of block
1368  * @param ref1 reference picture1 buffer at origin (0, 0)
1369  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1370  * @param current_mv current motion vector structure
1371  */
1372  static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
1373                        AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
1374                        int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
1375 {
1376     HEVCLocalContext *lc = s->HEVClc;
1377     ptrdiff_t src0stride  = ref0->linesize[0];
1378     ptrdiff_t src1stride  = ref1->linesize[0];
1379     int pic_width        = s->sps->width;
1380     int pic_height       = s->sps->height;
1381     int mx0              = mv0->x & 3;
1382     int my0              = mv0->y & 3;
1383     int mx1              = mv1->x & 3;
1384     int my1              = mv1->y & 3;
1385     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1386                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1387     int x_off0           = x_off + (mv0->x >> 2);
1388     int y_off0           = y_off + (mv0->y >> 2);
1389     int x_off1           = x_off + (mv1->x >> 2);
1390     int y_off1           = y_off + (mv1->y >> 2);
1391     int idx              = ff_hevc_pel_weight[block_w];
1392
1393     uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1394     uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1395
1396     if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
1397         x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1398         y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1399         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1400         int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1401         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1402
1403         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
1404                                  edge_emu_stride, src0stride,
1405                                  block_w + QPEL_EXTRA,
1406                                  block_h + QPEL_EXTRA,
1407                                  x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
1408                                  pic_width, pic_height);
1409         src0 = lc->edge_emu_buffer + buf_offset;
1410         src0stride = edge_emu_stride;
1411     }
1412
1413     if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
1414         x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
1415         y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
1416         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1417         int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1418         int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
1419
1420         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
1421                                  edge_emu_stride, src1stride,
1422                                  block_w + QPEL_EXTRA,
1423                                  block_h + QPEL_EXTRA,
1424                                  x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
1425                                  pic_width, pic_height);
1426         src1 = lc->edge_emu_buffer2 + buf_offset;
1427         src1stride = edge_emu_stride;
1428     }
1429
1430     s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](lc->tmp, src0, src0stride,
1431                                                 block_h, mx0, my0, block_w);
1432     if (!weight_flag)
1433         s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1434                                                        block_h, mx1, my1, block_w);
1435     else
1436         s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, lc->tmp,
1437                                                          block_h, s->sh.luma_log2_weight_denom,
1438                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
1439                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
1440                                                          s->sh.luma_offset_l0[current_mv->ref_idx[0]],
1441                                                          s->sh.luma_offset_l1[current_mv->ref_idx[1]],
1442                                                          mx1, my1, block_w);
1443
1444 }
1445
1446 /**
1447  * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
1448  *
1449  * @param s HEVC decoding context
1450  * @param dst1 target buffer for block data at block position (U plane)
1451  * @param dst2 target buffer for block data at block position (V plane)
1452  * @param dststride stride of the dst1 and dst2 buffers
1453  * @param ref reference picture buffer at origin (0, 0)
1454  * @param mv motion vector (relative to block position) to get pixel data from
1455  * @param x_off horizontal position of block from origin (0, 0)
1456  * @param y_off vertical position of block from origin (0, 0)
1457  * @param block_w width of block
1458  * @param block_h height of block
1459  * @param chroma_weight weighting factor applied to the chroma prediction
1460  * @param chroma_offset additive offset applied to the chroma prediction value
1461  */
1462
1463 static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
1464                           ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
1465                           int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
1466 {
1467     HEVCLocalContext *lc = s->HEVClc;
1468     int pic_width        = s->sps->width >> s->sps->hshift[1];
1469     int pic_height       = s->sps->height >> s->sps->vshift[1];
1470     const Mv *mv         = &current_mv->mv[reflist];
1471     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1472                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1473     int idx              = ff_hevc_pel_weight[block_w];
1474     int hshift           = s->sps->hshift[1];
1475     int vshift           = s->sps->vshift[1];
1476     intptr_t mx          = av_mod_uintp2(mv->x, 2 + hshift);
1477     intptr_t my          = av_mod_uintp2(mv->y, 2 + vshift);
1478     intptr_t _mx         = mx << (1 - hshift);
1479     intptr_t _my         = my << (1 - vshift);
1480
1481     x_off += mv->x >> (2 + hshift);
1482     y_off += mv->y >> (2 + vshift);
1483     src0  += y_off * srcstride + x_off * (1 << s->sps->pixel_shift);
1484
1485     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
1486         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1487         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1488         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1489         int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
1490         int buf_offset0 = EPEL_EXTRA_BEFORE *
1491                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1492         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
1493                                  edge_emu_stride, srcstride,
1494                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1495                                  x_off - EPEL_EXTRA_BEFORE,
1496                                  y_off - EPEL_EXTRA_BEFORE,
1497                                  pic_width, pic_height);
1498
1499         src0 = lc->edge_emu_buffer + buf_offset0;
1500         srcstride = edge_emu_stride;
1501     }
1502     if (!weight_flag)
1503         s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1504                                                   block_h, _mx, _my, block_w);
1505     else
1506         s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
1507                                                         block_h, s->sh.chroma_log2_weight_denom,
1508                                                         chroma_weight, chroma_offset, _mx, _my, block_w);
1509 }
1510
1511 /**
1512  * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
1513  *
1514  * @param s HEVC decoding context
1515  * @param dst target buffer for block data at block position
1516  * @param dststride stride of the dst buffer
1517  * @param ref0 reference picture0 buffer at origin (0, 0)
1518  * @param mv0 motion vector0 (relative to block position) to get pixel data from
1519  * @param x_off horizontal position of block from origin (0, 0)
1520  * @param y_off vertical position of block from origin (0, 0)
1521  * @param block_w width of block
1522  * @param block_h height of block
1523  * @param ref1 reference picture1 buffer at origin (0, 0)
1524  * @param mv1 motion vector1 (relative to block position) to get pixel data from
1525  * @param current_mv current motion vector structure
1526  * @param cidx chroma component(cb, cr)
1527  */
1528 static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
1529                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
1530 {
1531     HEVCLocalContext *lc = s->HEVClc;
1532     uint8_t *src1        = ref0->data[cidx+1];
1533     uint8_t *src2        = ref1->data[cidx+1];
1534     ptrdiff_t src1stride = ref0->linesize[cidx+1];
1535     ptrdiff_t src2stride = ref1->linesize[cidx+1];
1536     int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
1537                            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
1538     int pic_width        = s->sps->width >> s->sps->hshift[1];
1539     int pic_height       = s->sps->height >> s->sps->vshift[1];
1540     Mv *mv0              = &current_mv->mv[0];
1541     Mv *mv1              = &current_mv->mv[1];
1542     int hshift = s->sps->hshift[1];
1543     int vshift = s->sps->vshift[1];
1544
1545     intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift);
1546     intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift);
1547     intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift);
1548     intptr_t my1 = av_mod_uintp2(mv1->y, 2 + vshift);
1549     intptr_t _mx0 = mx0 << (1 - hshift);
1550     intptr_t _my0 = my0 << (1 - vshift);
1551     intptr_t _mx1 = mx1 << (1 - hshift);
1552     intptr_t _my1 = my1 << (1 - vshift);
1553
1554     int x_off0 = x_off + (mv0->x >> (2 + hshift));
1555     int y_off0 = y_off + (mv0->y >> (2 + vshift));
1556     int x_off1 = x_off + (mv1->x >> (2 + hshift));
1557     int y_off1 = y_off + (mv1->y >> (2 + vshift));
1558     int idx = ff_hevc_pel_weight[block_w];
1559     src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
1560     src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
1561
1562     if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
1563         x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1564         y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1565         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1566         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
1567         int buf_offset1 = EPEL_EXTRA_BEFORE *
1568                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1569
1570         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
1571                                  edge_emu_stride, src1stride,
1572                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1573                                  x_off0 - EPEL_EXTRA_BEFORE,
1574                                  y_off0 - EPEL_EXTRA_BEFORE,
1575                                  pic_width, pic_height);
1576
1577         src1 = lc->edge_emu_buffer + buf_offset1;
1578         src1stride = edge_emu_stride;
1579     }
1580
1581     if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
1582         x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
1583         y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
1584         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
1585         int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
1586         int buf_offset1 = EPEL_EXTRA_BEFORE *
1587                           (edge_emu_stride + (1 << s->sps->pixel_shift));
1588
1589         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
1590                                  edge_emu_stride, src2stride,
1591                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
1592                                  x_off1 - EPEL_EXTRA_BEFORE,
1593                                  y_off1 - EPEL_EXTRA_BEFORE,
1594                                  pic_width, pic_height);
1595
1596         src2 = lc->edge_emu_buffer2 + buf_offset1;
1597         src2stride = edge_emu_stride;
1598     }
1599
1600     s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](lc->tmp, src1, src1stride,
1601                                                 block_h, _mx0, _my0, block_w);
1602     if (!weight_flag)
1603         s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1604                                                        src2, src2stride, lc->tmp,
1605                                                        block_h, _mx1, _my1, block_w);
1606     else
1607         s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
1608                                                          src2, src2stride, lc->tmp,
1609                                                          block_h,
1610                                                          s->sh.chroma_log2_weight_denom,
1611                                                          s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
1612                                                          s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
1613                                                          s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
1614                                                          s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
1615                                                          _mx1, _my1, block_w);
1616 }
1617
1618 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
1619                                 const Mv *mv, int y0, int height)
1620 {
1621     int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
1622
1623     if (s->threads_type == FF_THREAD_FRAME )
1624         ff_thread_await_progress(&ref->tf, y, 0);
1625 }
1626
1627 static void hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
1628                                   int nPbH, int log2_cb_size, int part_idx,
1629                                   int merge_idx, MvField *mv)
1630 {
1631     HEVCLocalContext *lc = s->HEVClc;
1632     enum InterPredIdc inter_pred_idc = PRED_L0;
1633     int mvp_flag;
1634
1635     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
1636     mv->pred_flag = 0;
1637     if (s->sh.slice_type == B_SLICE)
1638         inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
1639
1640     if (inter_pred_idc != PRED_L1) {
1641         if (s->sh.nb_refs[L0])
1642             mv->ref_idx[0]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
1643
1644         mv->pred_flag = PF_L0;
1645         ff_hevc_hls_mvd_coding(s, x0, y0, 0);
1646         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1647         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1648                                  part_idx, merge_idx, mv, mvp_flag, 0);
1649         mv->mv[0].x += lc->pu.mvd.x;
1650         mv->mv[0].y += lc->pu.mvd.y;
1651     }
1652
1653     if (inter_pred_idc != PRED_L0) {
1654         if (s->sh.nb_refs[L1])
1655             mv->ref_idx[1]= ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L1]);
1656
1657         if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
1658             AV_ZERO32(&lc->pu.mvd);
1659         } else {
1660             ff_hevc_hls_mvd_coding(s, x0, y0, 1);
1661         }
1662
1663         mv->pred_flag += PF_L1;
1664         mvp_flag = ff_hevc_mvp_lx_flag_decode(s);
1665         ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1666                                  part_idx, merge_idx, mv, mvp_flag, 1);
1667         mv->mv[1].x += lc->pu.mvd.x;
1668         mv->mv[1].y += lc->pu.mvd.y;
1669     }
1670 }
1671
1672 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
1673                                 int nPbW, int nPbH,
1674                                 int log2_cb_size, int partIdx, int idx)
1675 {
1676 #define POS(c_idx, x, y)                                                              \
1677     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
1678                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
1679     HEVCLocalContext *lc = s->HEVClc;
1680     int merge_idx = 0;
1681     struct MvField current_mv = {{{ 0 }}};
1682
1683     int min_pu_width = s->sps->min_pu_width;
1684
1685     MvField *tab_mvf = s->ref->tab_mvf;
1686     RefPicList  *refPicList = s->ref->refPicList;
1687     HEVCFrame *ref0 = NULL, *ref1 = NULL;
1688     uint8_t *dst0 = POS(0, x0, y0);
1689     uint8_t *dst1 = POS(1, x0, y0);
1690     uint8_t *dst2 = POS(2, x0, y0);
1691     int log2_min_cb_size = s->sps->log2_min_cb_size;
1692     int min_cb_width     = s->sps->min_cb_width;
1693     int x_cb             = x0 >> log2_min_cb_size;
1694     int y_cb             = y0 >> log2_min_cb_size;
1695     int x_pu, y_pu;
1696     int i, j;
1697
1698     int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
1699
1700     if (!skip_flag)
1701         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
1702
1703     if (skip_flag || lc->pu.merge_flag) {
1704         if (s->sh.max_num_merge_cand > 1)
1705             merge_idx = ff_hevc_merge_idx_decode(s);
1706         else
1707             merge_idx = 0;
1708
1709         ff_hevc_luma_mv_merge_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1710                                    partIdx, merge_idx, &current_mv);
1711     } else {
1712         hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
1713                               partIdx, merge_idx, &current_mv);
1714     }
1715
1716     x_pu = x0 >> s->sps->log2_min_pu_size;
1717     y_pu = y0 >> s->sps->log2_min_pu_size;
1718
1719     for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
1720         for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
1721             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
1722
1723     if (current_mv.pred_flag & PF_L0) {
1724         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
1725         if (!ref0)
1726             return;
1727         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
1728     }
1729     if (current_mv.pred_flag & PF_L1) {
1730         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
1731         if (!ref1)
1732             return;
1733         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
1734     }
1735
1736     if (current_mv.pred_flag == PF_L0) {
1737         int x0_c = x0 >> s->sps->hshift[1];
1738         int y0_c = y0 >> s->sps->vshift[1];
1739         int nPbW_c = nPbW >> s->sps->hshift[1];
1740         int nPbH_c = nPbH >> s->sps->vshift[1];
1741
1742         luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
1743                     &current_mv.mv[0], x0, y0, nPbW, nPbH,
1744                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
1745                     s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
1746
1747         if (s->sps->chroma_format_idc) {
1748             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
1749                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1750                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
1751             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
1752                           0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1753                           s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
1754         }
1755     } else if (current_mv.pred_flag == PF_L1) {
1756         int x0_c = x0 >> s->sps->hshift[1];
1757         int y0_c = y0 >> s->sps->vshift[1];
1758         int nPbW_c = nPbW >> s->sps->hshift[1];
1759         int nPbH_c = nPbH >> s->sps->vshift[1];
1760
1761         luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
1762                     &current_mv.mv[1], x0, y0, nPbW, nPbH,
1763                     s->sh.luma_weight_l1[current_mv.ref_idx[1]],
1764                     s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
1765
1766         if (s->sps->chroma_format_idc) {
1767             chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
1768                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1769                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
1770
1771             chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
1772                           1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
1773                           s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
1774         }
1775     } else if (current_mv.pred_flag == PF_BI) {
1776         int x0_c = x0 >> s->sps->hshift[1];
1777         int y0_c = y0 >> s->sps->vshift[1];
1778         int nPbW_c = nPbW >> s->sps->hshift[1];
1779         int nPbH_c = nPbH >> s->sps->vshift[1];
1780
1781         luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
1782                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
1783                    ref1->frame, &current_mv.mv[1], &current_mv);
1784
1785         if (s->sps->chroma_format_idc) {
1786             chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
1787                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
1788
1789             chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
1790                          x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
1791         }
1792     }
1793 }
1794
1795 /**
1796  * 8.4.1
1797  */
1798 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
1799                                 int prev_intra_luma_pred_flag)
1800 {
1801     HEVCLocalContext *lc = s->HEVClc;
1802     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1803     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1804     int min_pu_width     = s->sps->min_pu_width;
1805     int size_in_pus      = pu_size >> s->sps->log2_min_pu_size;
1806     int x0b              = av_mod_uintp2(x0, s->sps->log2_ctb_size);
1807     int y0b              = av_mod_uintp2(y0, s->sps->log2_ctb_size);
1808
1809     int cand_up   = (lc->ctb_up_flag || y0b) ?
1810                     s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
1811     int cand_left = (lc->ctb_left_flag || x0b) ?
1812                     s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
1813
1814     int y_ctb = (y0 >> (s->sps->log2_ctb_size)) << (s->sps->log2_ctb_size);
1815
1816     MvField *tab_mvf = s->ref->tab_mvf;
1817     int intra_pred_mode;
1818     int candidate[3];
1819     int i, j;
1820
1821     // intra_pred_mode prediction does not cross vertical CTB boundaries
1822     if ((y0 - 1) < y_ctb)
1823         cand_up = INTRA_DC;
1824
1825     if (cand_left == cand_up) {
1826         if (cand_left < 2) {
1827             candidate[0] = INTRA_PLANAR;
1828             candidate[1] = INTRA_DC;
1829             candidate[2] = INTRA_ANGULAR_26;
1830         } else {
1831             candidate[0] = cand_left;
1832             candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
1833             candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
1834         }
1835     } else {
1836         candidate[0] = cand_left;
1837         candidate[1] = cand_up;
1838         if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
1839             candidate[2] = INTRA_PLANAR;
1840         } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
1841             candidate[2] = INTRA_DC;
1842         } else {
1843             candidate[2] = INTRA_ANGULAR_26;
1844         }
1845     }
1846
1847     if (prev_intra_luma_pred_flag) {
1848         intra_pred_mode = candidate[lc->pu.mpm_idx];
1849     } else {
1850         if (candidate[0] > candidate[1])
1851             FFSWAP(uint8_t, candidate[0], candidate[1]);
1852         if (candidate[0] > candidate[2])
1853             FFSWAP(uint8_t, candidate[0], candidate[2]);
1854         if (candidate[1] > candidate[2])
1855             FFSWAP(uint8_t, candidate[1], candidate[2]);
1856
1857         intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
1858         for (i = 0; i < 3; i++)
1859             if (intra_pred_mode >= candidate[i])
1860                 intra_pred_mode++;
1861     }
1862
1863     /* write the intra prediction units into the mv array */
1864     if (!size_in_pus)
1865         size_in_pus = 1;
1866     for (i = 0; i < size_in_pus; i++) {
1867         memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
1868                intra_pred_mode, size_in_pus);
1869
1870         for (j = 0; j < size_in_pus; j++) {
1871             tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
1872         }
1873     }
1874
1875     return intra_pred_mode;
1876 }
1877
1878 static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
1879                                           int log2_cb_size, int ct_depth)
1880 {
1881     int length = (1 << log2_cb_size) >> s->sps->log2_min_cb_size;
1882     int x_cb   = x0 >> s->sps->log2_min_cb_size;
1883     int y_cb   = y0 >> s->sps->log2_min_cb_size;
1884     int y;
1885
1886     for (y = 0; y < length; y++)
1887         memset(&s->tab_ct_depth[(y_cb + y) * s->sps->min_cb_width + x_cb],
1888                ct_depth, length);
1889 }
1890
1891 static const uint8_t tab_mode_idx[] = {
1892      0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
1893     21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
1894
1895 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
1896                                   int log2_cb_size)
1897 {
1898     HEVCLocalContext *lc = s->HEVClc;
1899     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
1900     uint8_t prev_intra_luma_pred_flag[4];
1901     int split   = lc->cu.part_mode == PART_NxN;
1902     int pb_size = (1 << log2_cb_size) >> split;
1903     int side    = split + 1;
1904     int chroma_mode;
1905     int i, j;
1906
1907     for (i = 0; i < side; i++)
1908         for (j = 0; j < side; j++)
1909             prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_prev_intra_luma_pred_flag_decode(s);
1910
1911     for (i = 0; i < side; i++) {
1912         for (j = 0; j < side; j++) {
1913             if (prev_intra_luma_pred_flag[2 * i + j])
1914                 lc->pu.mpm_idx = ff_hevc_mpm_idx_decode(s);
1915             else
1916                 lc->pu.rem_intra_luma_pred_mode = ff_hevc_rem_intra_luma_pred_mode_decode(s);
1917
1918             lc->pu.intra_pred_mode[2 * i + j] =
1919                 luma_intra_pred_mode(s, x0 + pb_size * j, y0 + pb_size * i, pb_size,
1920                                      prev_intra_luma_pred_flag[2 * i + j]);
1921         }
1922     }
1923
1924     if (s->sps->chroma_format_idc == 3) {
1925         for (i = 0; i < side; i++) {
1926             for (j = 0; j < side; j++) {
1927                 lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1928                 if (chroma_mode != 4) {
1929                     if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
1930                         lc->pu.intra_pred_mode_c[2 * i + j] = 34;
1931                     else
1932                         lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
1933                 } else {
1934                     lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
1935                 }
1936             }
1937         }
1938     } else if (s->sps->chroma_format_idc == 2) {
1939         int mode_idx;
1940         lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1941         if (chroma_mode != 4) {
1942             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1943                 mode_idx = 34;
1944             else
1945                 mode_idx = intra_chroma_table[chroma_mode];
1946         } else {
1947             mode_idx = lc->pu.intra_pred_mode[0];
1948         }
1949         lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
1950     } else if (s->sps->chroma_format_idc != 0) {
1951         chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
1952         if (chroma_mode != 4) {
1953             if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
1954                 lc->pu.intra_pred_mode_c[0] = 34;
1955             else
1956                 lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
1957         } else {
1958             lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
1959         }
1960     }
1961 }
1962
1963 static void intra_prediction_unit_default_value(HEVCContext *s,
1964                                                 int x0, int y0,
1965                                                 int log2_cb_size)
1966 {
1967     HEVCLocalContext *lc = s->HEVClc;
1968     int pb_size          = 1 << log2_cb_size;
1969     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
1970     int min_pu_width     = s->sps->min_pu_width;
1971     MvField *tab_mvf     = s->ref->tab_mvf;
1972     int x_pu             = x0 >> s->sps->log2_min_pu_size;
1973     int y_pu             = y0 >> s->sps->log2_min_pu_size;
1974     int j, k;
1975
1976     if (size_in_pus == 0)
1977         size_in_pus = 1;
1978     for (j = 0; j < size_in_pus; j++)
1979         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
1980     if (lc->cu.pred_mode == MODE_INTRA)
1981         for (j = 0; j < size_in_pus; j++)
1982             for (k = 0; k < size_in_pus; k++)
1983                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
1984 }
1985
1986 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
1987 {
1988     int cb_size          = 1 << log2_cb_size;
1989     HEVCLocalContext *lc = s->HEVClc;
1990     int log2_min_cb_size = s->sps->log2_min_cb_size;
1991     int length           = cb_size >> log2_min_cb_size;
1992     int min_cb_width     = s->sps->min_cb_width;
1993     int x_cb             = x0 >> log2_min_cb_size;
1994     int y_cb             = y0 >> log2_min_cb_size;
1995     int idx              = log2_cb_size - 2;
1996     int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
1997     int x, y, ret;
1998
1999     lc->cu.x                = x0;
2000     lc->cu.y                = y0;
2001     lc->cu.pred_mode        = MODE_INTRA;
2002     lc->cu.part_mode        = PART_2Nx2N;
2003     lc->cu.intra_split_flag = 0;
2004
2005     SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
2006     for (x = 0; x < 4; x++)
2007         lc->pu.intra_pred_mode[x] = 1;
2008     if (s->pps->transquant_bypass_enable_flag) {
2009         lc->cu.cu_transquant_bypass_flag = ff_hevc_cu_transquant_bypass_flag_decode(s);
2010         if (lc->cu.cu_transquant_bypass_flag)
2011             set_deblocking_bypass(s, x0, y0, log2_cb_size);
2012     } else
2013         lc->cu.cu_transquant_bypass_flag = 0;
2014
2015     if (s->sh.slice_type != I_SLICE) {
2016         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
2017
2018         x = y_cb * min_cb_width + x_cb;
2019         for (y = 0; y < length; y++) {
2020             memset(&s->skip_flag[x], skip_flag, length);
2021             x += min_cb_width;
2022         }
2023         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
2024     } else {
2025         x = y_cb * min_cb_width + x_cb;
2026         for (y = 0; y < length; y++) {
2027             memset(&s->skip_flag[x], 0, length);
2028             x += min_cb_width;
2029         }
2030     }
2031
2032     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
2033         hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2034         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2035
2036         if (!s->sh.disable_deblocking_filter_flag)
2037             ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2038     } else {
2039         int pcm_flag = 0;
2040
2041         if (s->sh.slice_type != I_SLICE)
2042             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
2043         if (lc->cu.pred_mode != MODE_INTRA ||
2044             log2_cb_size == s->sps->log2_min_cb_size) {
2045             lc->cu.part_mode        = ff_hevc_part_mode_decode(s, log2_cb_size);
2046             lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
2047                                       lc->cu.pred_mode == MODE_INTRA;
2048         }
2049
2050         if (lc->cu.pred_mode == MODE_INTRA) {
2051             if (lc->cu.part_mode == PART_2Nx2N && s->sps->pcm_enabled_flag &&
2052                 log2_cb_size >= s->sps->pcm.log2_min_pcm_cb_size &&
2053                 log2_cb_size <= s->sps->pcm.log2_max_pcm_cb_size) {
2054                 pcm_flag = ff_hevc_pcm_flag_decode(s);
2055             }
2056             if (pcm_flag) {
2057                 intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2058                 ret = hls_pcm_sample(s, x0, y0, log2_cb_size);
2059                 if (s->sps->pcm.loop_filter_disable_flag)
2060                     set_deblocking_bypass(s, x0, y0, log2_cb_size);
2061
2062                 if (ret < 0)
2063                     return ret;
2064             } else {
2065                 intra_prediction_unit(s, x0, y0, log2_cb_size);
2066             }
2067         } else {
2068             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
2069             switch (lc->cu.part_mode) {
2070             case PART_2Nx2N:
2071                 hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
2072                 break;
2073             case PART_2NxN:
2074                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
2075                 hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
2076                 break;
2077             case PART_Nx2N:
2078                 hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
2079                 hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
2080                 break;
2081             case PART_2NxnU:
2082                 hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
2083                 hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
2084                 break;
2085             case PART_2NxnD:
2086                 hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
2087                 hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
2088                 break;
2089             case PART_nLx2N:
2090                 hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
2091                 hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
2092                 break;
2093             case PART_nRx2N:
2094                 hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
2095                 hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
2096                 break;
2097             case PART_NxN:
2098                 hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
2099                 hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
2100                 hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
2101                 hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
2102                 break;
2103             }
2104         }
2105
2106         if (!pcm_flag) {
2107             int rqt_root_cbf = 1;
2108
2109             if (lc->cu.pred_mode != MODE_INTRA &&
2110                 !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
2111                 rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
2112             }
2113             if (rqt_root_cbf) {
2114                 const static int cbf[2] = { 0 };
2115                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
2116                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
2117                                          s->sps->max_transform_hierarchy_depth_inter;
2118                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
2119                                          log2_cb_size,
2120                                          log2_cb_size, 0, 0, cbf, cbf);
2121                 if (ret < 0)
2122                     return ret;
2123             } else {
2124                 if (!s->sh.disable_deblocking_filter_flag)
2125                     ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
2126             }
2127         }
2128     }
2129
2130     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
2131         ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
2132
2133     x = y_cb * min_cb_width + x_cb;
2134     for (y = 0; y < length; y++) {
2135         memset(&s->qp_y_tab[x], lc->qp_y, length);
2136         x += min_cb_width;
2137     }
2138
2139     if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2140        ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
2141         lc->qPy_pred = lc->qp_y;
2142     }
2143
2144     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
2145
2146     return 0;
2147 }
2148
2149 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
2150                                int log2_cb_size, int cb_depth)
2151 {
2152     HEVCLocalContext *lc = s->HEVClc;
2153     const int cb_size    = 1 << log2_cb_size;
2154     int ret;
2155     int split_cu;
2156
2157     lc->ct_depth = cb_depth;
2158     if (x0 + cb_size <= s->sps->width  &&
2159         y0 + cb_size <= s->sps->height &&
2160         log2_cb_size > s->sps->log2_min_cb_size) {
2161         split_cu = ff_hevc_split_coding_unit_flag_decode(s, cb_depth, x0, y0);
2162     } else {
2163         split_cu = (log2_cb_size > s->sps->log2_min_cb_size);
2164     }
2165     if (s->pps->cu_qp_delta_enabled_flag &&
2166         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth) {
2167         lc->tu.is_cu_qp_delta_coded = 0;
2168         lc->tu.cu_qp_delta          = 0;
2169     }
2170
2171     if (s->sh.cu_chroma_qp_offset_enabled_flag &&
2172         log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
2173         lc->tu.is_cu_chroma_qp_offset_coded = 0;
2174     }
2175
2176     if (split_cu) {
2177         int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
2178         const int cb_size_split = cb_size >> 1;
2179         const int x1 = x0 + cb_size_split;
2180         const int y1 = y0 + cb_size_split;
2181
2182         int more_data = 0;
2183
2184         more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
2185         if (more_data < 0)
2186             return more_data;
2187
2188         if (more_data && x1 < s->sps->width) {
2189             more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
2190             if (more_data < 0)
2191                 return more_data;
2192         }
2193         if (more_data && y1 < s->sps->height) {
2194             more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
2195             if (more_data < 0)
2196                 return more_data;
2197         }
2198         if (more_data && x1 < s->sps->width &&
2199             y1 < s->sps->height) {
2200             more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
2201             if (more_data < 0)
2202                 return more_data;
2203         }
2204
2205         if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
2206             ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
2207             lc->qPy_pred = lc->qp_y;
2208
2209         if (more_data)
2210             return ((x1 + cb_size_split) < s->sps->width ||
2211                     (y1 + cb_size_split) < s->sps->height);
2212         else
2213             return 0;
2214     } else {
2215         ret = hls_coding_unit(s, x0, y0, log2_cb_size);
2216         if (ret < 0)
2217             return ret;
2218         if ((!((x0 + cb_size) %
2219                (1 << (s->sps->log2_ctb_size))) ||
2220              (x0 + cb_size >= s->sps->width)) &&
2221             (!((y0 + cb_size) %
2222                (1 << (s->sps->log2_ctb_size))) ||
2223              (y0 + cb_size >= s->sps->height))) {
2224             int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
2225             return !end_of_slice_flag;
2226         } else {
2227             return 1;
2228         }
2229     }
2230
2231     return 0;
2232 }
2233
2234 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
2235                                  int ctb_addr_ts)
2236 {
2237     HEVCLocalContext *lc  = s->HEVClc;
2238     int ctb_size          = 1 << s->sps->log2_ctb_size;
2239     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2240     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
2241
2242     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
2243
2244     if (s->pps->entropy_coding_sync_enabled_flag) {
2245         if (x_ctb == 0 && (y_ctb & (ctb_size - 1)) == 0)
2246             lc->first_qp_group = 1;
2247         lc->end_of_tiles_x = s->sps->width;
2248     } else if (s->pps->tiles_enabled_flag) {
2249         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
2250             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
2251             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
2252             lc->first_qp_group   = 1;
2253         }
2254     } else {
2255         lc->end_of_tiles_x = s->sps->width;
2256     }
2257
2258     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
2259
2260     lc->boundary_flags = 0;
2261     if (s->pps->tiles_enabled_flag) {
2262         if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]])
2263             lc->boundary_flags |= BOUNDARY_LEFT_TILE;
2264         if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
2265             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2266         if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
2267             lc->boundary_flags |= BOUNDARY_UPPER_TILE;
2268         if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
2269             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2270     } else {
2271         if (ctb_addr_in_slice <= 0)
2272             lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
2273         if (ctb_addr_in_slice < s->sps->ctb_width)
2274             lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
2275     }
2276
2277     lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
2278     lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
2279     lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
2280     lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
2281 }
2282
2283 static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
2284 {
2285     HEVCContext *s  = avctxt->priv_data;
2286     int ctb_size    = 1 << s->sps->log2_ctb_size;
2287     int more_data   = 1;
2288     int x_ctb       = 0;
2289     int y_ctb       = 0;
2290     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
2291
2292     if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
2293         av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
2294         return AVERROR_INVALIDDATA;
2295     }
2296
2297     if (s->sh.dependent_slice_segment_flag) {
2298         int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
2299         if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
2300             av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
2301             return AVERROR_INVALIDDATA;
2302         }
2303     }
2304
2305     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
2306         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2307
2308         x_ctb = (ctb_addr_rs % ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2309         y_ctb = (ctb_addr_rs / ((s->sps->width + ctb_size - 1) >> s->sps->log2_ctb_size)) << s->sps->log2_ctb_size;
2310         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2311
2312         ff_hevc_cabac_init(s, ctb_addr_ts);
2313
2314         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2315
2316         s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
2317         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
2318         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
2319
2320         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2321         if (more_data < 0) {
2322             s->tab_slice_address[ctb_addr_rs] = -1;
2323             return more_data;
2324         }
2325
2326
2327         ctb_addr_ts++;
2328         ff_hevc_save_states(s, ctb_addr_ts);
2329         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2330     }
2331
2332     if (x_ctb + ctb_size >= s->sps->width &&
2333         y_ctb + ctb_size >= s->sps->height)
2334         ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2335
2336     return ctb_addr_ts;
2337 }
2338
2339 static int hls_slice_data(HEVCContext *s)
2340 {
2341     int arg[2];
2342     int ret[2];
2343
2344     arg[0] = 0;
2345     arg[1] = 1;
2346
2347     s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
2348     return ret[0];
2349 }
2350 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
2351 {
2352     HEVCContext *s1  = avctxt->priv_data, *s;
2353     HEVCLocalContext *lc;
2354     int ctb_size    = 1<< s1->sps->log2_ctb_size;
2355     int more_data   = 1;
2356     int *ctb_row_p    = input_ctb_row;
2357     int ctb_row = ctb_row_p[job];
2358     int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
2359     int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
2360     int thread = ctb_row % s1->threads_number;
2361     int ret;
2362
2363     s = s1->sList[self_id];
2364     lc = s->HEVClc;
2365
2366     if(ctb_row) {
2367         ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
2368
2369         if (ret < 0)
2370             return ret;
2371         ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
2372     }
2373
2374     while(more_data && ctb_addr_ts < s->sps->ctb_size) {
2375         int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
2376         int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
2377
2378         hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
2379
2380         ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
2381
2382         if (avpriv_atomic_int_get(&s1->wpp_err)){
2383             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2384             return 0;
2385         }
2386
2387         ff_hevc_cabac_init(s, ctb_addr_ts);
2388         hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
2389         more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
2390
2391         if (more_data < 0) {
2392             s->tab_slice_address[ctb_addr_rs] = -1;
2393             return more_data;
2394         }
2395
2396         ctb_addr_ts++;
2397
2398         ff_hevc_save_states(s, ctb_addr_ts);
2399         ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
2400         ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
2401
2402         if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
2403             avpriv_atomic_int_set(&s1->wpp_err,  1);
2404             ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2405             return 0;
2406         }
2407
2408         if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
2409             ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
2410             ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
2411             return ctb_addr_ts;
2412         }
2413         ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
2414         x_ctb+=ctb_size;
2415
2416         if(x_ctb >= s->sps->width) {
2417             break;
2418         }
2419     }
2420     ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
2421
2422     return 0;
2423 }
2424
2425 static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
2426 {
2427     HEVCLocalContext *lc = s->HEVClc;
2428     int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2429     int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
2430     int offset;
2431     int startheader, cmpt = 0;
2432     int i, j, res = 0;
2433
2434     if (!ret || !arg) {
2435         av_free(ret);
2436         av_free(arg);
2437         return AVERROR(ENOMEM);
2438     }
2439
2440
2441     if (!s->sList[1]) {
2442         ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
2443
2444
2445         for (i = 1; i < s->threads_number; i++) {
2446             s->sList[i] = av_malloc(sizeof(HEVCContext));
2447             memcpy(s->sList[i], s, sizeof(HEVCContext));
2448             s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
2449             s->sList[i]->HEVClc = s->HEVClcList[i];
2450         }
2451     }
2452
2453     offset = (lc->gb.index >> 3);
2454
2455     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
2456         if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2457             startheader--;
2458             cmpt++;
2459         }
2460     }
2461
2462     for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
2463         offset += (s->sh.entry_point_offset[i - 1] - cmpt);
2464         for (j = 0, cmpt = 0, startheader = offset
2465              + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
2466             if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
2467                 startheader--;
2468                 cmpt++;
2469             }
2470         }
2471         s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
2472         s->sh.offset[i - 1] = offset;
2473
2474     }
2475     if (s->sh.num_entry_point_offsets != 0) {
2476         offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
2477         s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
2478         s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
2479
2480     }
2481     s->data = nal;
2482
2483     for (i = 1; i < s->threads_number; i++) {
2484         s->sList[i]->HEVClc->first_qp_group = 1;
2485         s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
2486         memcpy(s->sList[i], s, sizeof(HEVCContext));
2487         s->sList[i]->HEVClc = s->HEVClcList[i];
2488     }
2489
2490     avpriv_atomic_int_set(&s->wpp_err, 0);
2491     ff_reset_entries(s->avctx);
2492
2493     for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
2494         arg[i] = i;
2495         ret[i] = 0;
2496     }
2497
2498     if (s->pps->entropy_coding_sync_enabled_flag)
2499         s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
2500
2501     for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
2502         res += ret[i];
2503     av_free(ret);
2504     av_free(arg);
2505     return res;
2506 }
2507
2508 /**
2509  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
2510  * 0 if the unit should be skipped, 1 otherwise
2511  */
2512 static int hls_nal_unit(HEVCContext *s)
2513 {
2514     GetBitContext *gb = &s->HEVClc->gb;
2515     int nuh_layer_id;
2516
2517     if (get_bits1(gb) != 0)
2518         return AVERROR_INVALIDDATA;
2519
2520     s->nal_unit_type = get_bits(gb, 6);
2521
2522     nuh_layer_id   = get_bits(gb, 6);
2523     s->temporal_id = get_bits(gb, 3) - 1;
2524     if (s->temporal_id < 0)
2525         return AVERROR_INVALIDDATA;
2526
2527     av_log(s->avctx, AV_LOG_DEBUG,
2528            "nal_unit_type: %d, nuh_layer_id: %d, temporal_id: %d\n",
2529            s->nal_unit_type, nuh_layer_id, s->temporal_id);
2530
2531     return nuh_layer_id == 0;
2532 }
2533
2534 static int set_side_data(HEVCContext *s)
2535 {
2536     AVFrame *out = s->ref->frame;
2537
2538     if (s->sei_frame_packing_present &&
2539         s->frame_packing_arrangement_type >= 3 &&
2540         s->frame_packing_arrangement_type <= 5 &&
2541         s->content_interpretation_type > 0 &&
2542         s->content_interpretation_type < 3) {
2543         AVStereo3D *stereo = av_stereo3d_create_side_data(out);
2544         if (!stereo)
2545             return AVERROR(ENOMEM);
2546
2547         switch (s->frame_packing_arrangement_type) {
2548         case 3:
2549             if (s->quincunx_subsampling)
2550                 stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
2551             else
2552                 stereo->type = AV_STEREO3D_SIDEBYSIDE;
2553             break;
2554         case 4:
2555             stereo->type = AV_STEREO3D_TOPBOTTOM;
2556             break;
2557         case 5:
2558             stereo->type = AV_STEREO3D_FRAMESEQUENCE;
2559             break;
2560         }
2561
2562         if (s->content_interpretation_type == 2)
2563             stereo->flags = AV_STEREO3D_FLAG_INVERT;
2564     }
2565
2566     if (s->sei_display_orientation_present &&
2567         (s->sei_anticlockwise_rotation || s->sei_hflip || s->sei_vflip)) {
2568         double angle = s->sei_anticlockwise_rotation * 360 / (double) (1 << 16);
2569         AVFrameSideData *rotation = av_frame_new_side_data(out,
2570                                                            AV_FRAME_DATA_DISPLAYMATRIX,
2571                                                            sizeof(int32_t) * 9);
2572         if (!rotation)
2573             return AVERROR(ENOMEM);
2574
2575         av_display_rotation_set((int32_t *)rotation->data, angle);
2576         av_display_matrix_flip((int32_t *)rotation->data,
2577                                s->sei_hflip, s->sei_vflip);
2578     }
2579
2580     return 0;
2581 }
2582
2583 static int hevc_frame_start(HEVCContext *s)
2584 {
2585     HEVCLocalContext *lc = s->HEVClc;
2586     int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
2587                            ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
2588     int ret;
2589
2590     memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
2591     memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
2592     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
2593     memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
2594     memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
2595
2596     s->is_decoded        = 0;
2597     s->first_nal_type    = s->nal_unit_type;
2598
2599     if (s->pps->tiles_enabled_flag)
2600         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
2601
2602     ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
2603     if (ret < 0)
2604         goto fail;
2605
2606     ret = ff_hevc_frame_rps(s);
2607     if (ret < 0) {
2608         av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
2609         goto fail;
2610     }
2611
2612     s->ref->frame->key_frame = IS_IRAP(s);
2613
2614     ret = set_side_data(s);
2615     if (ret < 0)
2616         goto fail;
2617
2618     s->frame->pict_type = 3 - s->sh.slice_type;
2619
2620     if (!IS_IRAP(s))
2621         ff_hevc_bump_frame(s);
2622
2623     av_frame_unref(s->output_frame);
2624     ret = ff_hevc_output_frame(s, s->output_frame, 0);
2625     if (ret < 0)
2626         goto fail;
2627
2628     if (!s->avctx->hwaccel)
2629         ff_thread_finish_setup(s->avctx);
2630
2631     return 0;
2632
2633 fail:
2634     if (s->ref)
2635         ff_hevc_unref_frame(s, s->ref, ~0);
2636     s->ref = NULL;
2637     return ret;
2638 }
2639
2640 static int decode_nal_unit(HEVCContext *s, const HEVCNAL *nal)
2641 {
2642     HEVCLocalContext *lc = s->HEVClc;
2643     GetBitContext *gb    = &lc->gb;
2644     int ctb_addr_ts, ret;
2645
2646     ret = init_get_bits8(gb, nal->data, nal->size);
2647     if (ret < 0)
2648         return ret;
2649
2650     ret = hls_nal_unit(s);
2651     if (ret < 0) {
2652         av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit %d, skipping.\n",
2653                s->nal_unit_type);
2654         goto fail;
2655     } else if (!ret)
2656         return 0;
2657
2658     switch (s->nal_unit_type) {
2659     case NAL_VPS:
2660         ret = ff_hevc_decode_nal_vps(s);
2661         if (ret < 0)
2662             goto fail;
2663         break;
2664     case NAL_SPS:
2665         ret = ff_hevc_decode_nal_sps(s);
2666         if (ret < 0)
2667             goto fail;
2668         break;
2669     case NAL_PPS:
2670         ret = ff_hevc_decode_nal_pps(s);
2671         if (ret < 0)
2672             goto fail;
2673         break;
2674     case NAL_SEI_PREFIX:
2675     case NAL_SEI_SUFFIX:
2676         ret = ff_hevc_decode_nal_sei(s);
2677         if (ret < 0)
2678             goto fail;
2679         break;
2680     case NAL_TRAIL_R:
2681     case NAL_TRAIL_N:
2682     case NAL_TSA_N:
2683     case NAL_TSA_R:
2684     case NAL_STSA_N:
2685     case NAL_STSA_R:
2686     case NAL_BLA_W_LP:
2687     case NAL_BLA_W_RADL:
2688     case NAL_BLA_N_LP:
2689     case NAL_IDR_W_RADL:
2690     case NAL_IDR_N_LP:
2691     case NAL_CRA_NUT:
2692     case NAL_RADL_N:
2693     case NAL_RADL_R:
2694     case NAL_RASL_N:
2695     case NAL_RASL_R:
2696         ret = hls_slice_header(s);
2697         if (ret < 0)
2698             return ret;
2699
2700         if (s->max_ra == INT_MAX) {
2701             if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) {
2702                 s->max_ra = s->poc;
2703             } else {
2704                 if (IS_IDR(s))
2705                     s->max_ra = INT_MIN;
2706             }
2707         }
2708
2709         if ((s->nal_unit_type == NAL_RASL_R || s->nal_unit_type == NAL_RASL_N) &&
2710             s->poc <= s->max_ra) {
2711             s->is_decoded = 0;
2712             break;
2713         } else {
2714             if (s->nal_unit_type == NAL_RASL_R && s->poc > s->max_ra)
2715                 s->max_ra = INT_MIN;
2716         }
2717
2718         if (s->sh.first_slice_in_pic_flag) {
2719             ret = hevc_frame_start(s);
2720             if (ret < 0)
2721                 return ret;
2722         } else if (!s->ref) {
2723             av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
2724             goto fail;
2725         }
2726
2727         if (s->nal_unit_type != s->first_nal_type) {
2728             av_log(s->avctx, AV_LOG_ERROR,
2729                    "Non-matching NAL types of the VCL NALUs: %d %d\n",
2730                    s->first_nal_type, s->nal_unit_type);
2731             return AVERROR_INVALIDDATA;
2732         }
2733
2734         if (!s->sh.dependent_slice_segment_flag &&
2735             s->sh.slice_type != I_SLICE) {
2736             ret = ff_hevc_slice_rpl(s);
2737             if (ret < 0) {
2738                 av_log(s->avctx, AV_LOG_WARNING,
2739                        "Error constructing the reference lists for the current slice.\n");
2740                 goto fail;
2741             }
2742         }
2743
2744         if (s->sh.first_slice_in_pic_flag && s->avctx->hwaccel) {
2745             ret = s->avctx->hwaccel->start_frame(s->avctx, NULL, 0);
2746             if (ret < 0)
2747                 goto fail;
2748         }
2749
2750         if (s->avctx->hwaccel) {
2751             ret = s->avctx->hwaccel->decode_slice(s->avctx, nal->raw_data, nal->raw_size);
2752             if (ret < 0)
2753                 goto fail;
2754         } else {
2755             if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
2756                 ctb_addr_ts = hls_slice_data_wpp(s, nal->data, nal->size);
2757             else
2758                 ctb_addr_ts = hls_slice_data(s);
2759             if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
2760                 s->is_decoded = 1;
2761             }
2762
2763             if (ctb_addr_ts < 0) {
2764                 ret = ctb_addr_ts;
2765                 goto fail;
2766             }
2767         }
2768         break;
2769     case NAL_EOS_NUT:
2770     case NAL_EOB_NUT:
2771         s->seq_decode = (s->seq_decode + 1) & 0xff;
2772         s->max_ra     = INT_MAX;
2773         break;
2774     case NAL_AUD:
2775     case NAL_FD_NUT:
2776         break;
2777     default:
2778         av_log(s->avctx, AV_LOG_INFO,
2779                "Skipping NAL unit %d\n", s->nal_unit_type);
2780     }
2781
2782     return 0;
2783 fail:
2784     if (s->avctx->err_recognition & AV_EF_EXPLODE)
2785         return ret;
2786     return 0;
2787 }
2788
2789 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
2790  * between these functions would be nice. */
2791 int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
2792                          HEVCNAL *nal)
2793 {
2794     int i, si, di;
2795     uint8_t *dst;
2796
2797     s->skipped_bytes = 0;
2798 #define STARTCODE_TEST                                                  \
2799         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
2800             if (src[i + 2] != 3) {                                      \
2801                 /* startcode, so we must be past the end */             \
2802                 length = i;                                             \
2803             }                                                           \
2804             break;                                                      \
2805         }
2806 #if HAVE_FAST_UNALIGNED
2807 #define FIND_FIRST_ZERO                                                 \
2808         if (i > 0 && !src[i])                                           \
2809             i--;                                                        \
2810         while (src[i])                                                  \
2811             i++
2812 #if HAVE_FAST_64BIT
2813     for (i = 0; i + 1 < length; i += 9) {
2814         if (!((~AV_RN64A(src + i) &
2815                (AV_RN64A(src + i) - 0x0100010001000101ULL)) &
2816               0x8000800080008080ULL))
2817             continue;
2818         FIND_FIRST_ZERO;
2819         STARTCODE_TEST;
2820         i -= 7;
2821     }
2822 #else
2823     for (i = 0; i + 1 < length; i += 5) {
2824         if (!((~AV_RN32A(src + i) &
2825                (AV_RN32A(src + i) - 0x01000101U)) &
2826               0x80008080U))
2827             continue;
2828         FIND_FIRST_ZERO;
2829         STARTCODE_TEST;
2830         i -= 3;
2831     }
2832 #endif /* HAVE_FAST_64BIT */
2833 #else
2834     for (i = 0; i + 1 < length; i += 2) {
2835         if (src[i])
2836             continue;
2837         if (i > 0 && src[i - 1] == 0)
2838             i--;
2839         STARTCODE_TEST;
2840     }
2841 #endif /* HAVE_FAST_UNALIGNED */
2842
2843     if (i >= length - 1) { // no escaped 0
2844         nal->data     =
2845         nal->raw_data = src;
2846         nal->size     =
2847         nal->raw_size = length;
2848         return length;
2849     }
2850
2851     av_fast_malloc(&nal->rbsp_buffer, &nal->rbsp_buffer_size,
2852                    length + FF_INPUT_BUFFER_PADDING_SIZE);
2853     if (!nal->rbsp_buffer)
2854         return AVERROR(ENOMEM);
2855
2856     dst = nal->rbsp_buffer;
2857
2858     memcpy(dst, src, i);
2859     si = di = i;
2860     while (si + 2 < length) {
2861         // remove escapes (very rare 1:2^22)
2862         if (src[si + 2] > 3) {
2863             dst[di++] = src[si++];
2864             dst[di++] = src[si++];
2865         } else if (src[si] == 0 && src[si + 1] == 0) {
2866             if (src[si + 2] == 3) { // escape
2867                 dst[di++] = 0;
2868                 dst[di++] = 0;
2869                 si       += 3;
2870
2871                 s->skipped_bytes++;
2872                 if (s->skipped_bytes_pos_size < s->skipped_bytes) {
2873                     s->skipped_bytes_pos_size *= 2;
2874                     av_reallocp_array(&s->skipped_bytes_pos,
2875                             s->skipped_bytes_pos_size,
2876                             sizeof(*s->skipped_bytes_pos));
2877                     if (!s->skipped_bytes_pos)
2878                         return AVERROR(ENOMEM);
2879                 }
2880                 if (s->skipped_bytes_pos)
2881                     s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
2882                 continue;
2883             } else // next start code
2884                 goto nsc;
2885         }
2886
2887         dst[di++] = src[si++];
2888     }
2889     while (si < length)
2890         dst[di++] = src[si++];
2891
2892 nsc:
2893     memset(dst + di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
2894
2895     nal->data = dst;
2896     nal->size = di;
2897     nal->raw_data = src;
2898     nal->raw_size = si;
2899     return si;
2900 }
2901
2902 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
2903 {
2904     int i, consumed, ret = 0;
2905
2906     s->ref = NULL;
2907     s->last_eos = s->eos;
2908     s->eos = 0;
2909
2910     /* split the input packet into NAL units, so we know the upper bound on the
2911      * number of slices in the frame */
2912     s->nb_nals = 0;
2913     while (length >= 4) {
2914         HEVCNAL *nal;
2915         int extract_length = 0;
2916
2917         if (s->is_nalff) {
2918             int i;
2919             for (i = 0; i < s->nal_length_size; i++)
2920                 extract_length = (extract_length << 8) | buf[i];
2921             buf    += s->nal_length_size;
2922             length -= s->nal_length_size;
2923
2924             if (extract_length > length) {
2925                 av_log(s->avctx, AV_LOG_ERROR, "Invalid NAL unit size.\n");
2926                 ret = AVERROR_INVALIDDATA;
2927                 goto fail;
2928             }
2929         } else {
2930             /* search start code */
2931             while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
2932                 ++buf;
2933                 --length;
2934                 if (length < 4) {
2935                     av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
2936                     ret = AVERROR_INVALIDDATA;
2937                     goto fail;
2938                 }
2939             }
2940
2941             buf           += 3;
2942             length        -= 3;
2943         }
2944
2945         if (!s->is_nalff)
2946             extract_length = length;
2947
2948         if (s->nals_allocated < s->nb_nals + 1) {
2949             int new_size = s->nals_allocated + 1;
2950             void *tmp = av_realloc_array(s->nals, new_size, sizeof(*s->nals));
2951             ret = AVERROR(ENOMEM);
2952             if (!tmp) {
2953                 goto fail;
2954             }
2955             s->nals = tmp;
2956             memset(s->nals + s->nals_allocated, 0,
2957                    (new_size - s->nals_allocated) * sizeof(*s->nals));
2958
2959             tmp = av_realloc_array(s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
2960             if (!tmp)
2961                 goto fail;
2962             s->skipped_bytes_nal = tmp;
2963
2964             tmp = av_realloc_array(s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
2965             if (!tmp)
2966                 goto fail;
2967             s->skipped_bytes_pos_size_nal = tmp;
2968
2969             tmp = av_realloc_array(s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
2970             if (!tmp)
2971                 goto fail;
2972             s->skipped_bytes_pos_nal = tmp;
2973
2974             s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
2975             s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
2976             if (!s->skipped_bytes_pos_nal[s->nals_allocated])
2977                 goto fail;
2978             s->nals_allocated = new_size;
2979         }
2980         s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
2981         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
2982         nal = &s->nals[s->nb_nals];
2983
2984         consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
2985
2986         s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
2987         s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
2988         s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
2989
2990
2991         if (consumed < 0) {
2992             ret = consumed;
2993             goto fail;
2994         }
2995
2996         ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
2997         if (ret < 0)
2998             goto fail;
2999         hls_nal_unit(s);
3000
3001         if (s->nal_unit_type == NAL_EOB_NUT ||
3002             s->nal_unit_type == NAL_EOS_NUT)
3003             s->eos = 1;
3004
3005         buf    += consumed;
3006         length -= consumed;
3007     }
3008
3009     /* parse the NAL units */
3010     for (i = 0; i < s->nb_nals; i++) {
3011         s->skipped_bytes = s->skipped_bytes_nal[i];
3012         s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
3013
3014         ret = decode_nal_unit(s, &s->nals[i]);
3015         if (ret < 0) {
3016             av_log(s->avctx, AV_LOG_WARNING,
3017                    "Error parsing NAL unit #%d.\n", i);
3018             goto fail;
3019         }
3020     }
3021
3022 fail:
3023     if (s->ref && s->threads_type == FF_THREAD_FRAME)
3024         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
3025
3026     return ret;
3027 }
3028
3029 static void print_md5(void *log_ctx, int level, uint8_t md5[16])
3030 {
3031     int i;
3032     for (i = 0; i < 16; i++)
3033         av_log(log_ctx, level, "%02"PRIx8, md5[i]);
3034 }
3035
3036 static int verify_md5(HEVCContext *s, AVFrame *frame)
3037 {
3038     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
3039     int pixel_shift;
3040     int i, j;
3041
3042     if (!desc)
3043         return AVERROR(EINVAL);
3044
3045     pixel_shift = desc->comp[0].depth_minus1 > 7;
3046
3047     av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
3048            s->poc);
3049
3050     /* the checksums are LE, so we have to byteswap for >8bpp formats
3051      * on BE arches */
3052 #if HAVE_BIGENDIAN
3053     if (pixel_shift && !s->checksum_buf) {
3054         av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
3055                        FFMAX3(frame->linesize[0], frame->linesize[1],
3056                               frame->linesize[2]));
3057         if (!s->checksum_buf)
3058             return AVERROR(ENOMEM);
3059     }
3060 #endif
3061
3062     for (i = 0; frame->data[i]; i++) {
3063         int width  = s->avctx->coded_width;
3064         int height = s->avctx->coded_height;
3065         int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
3066         int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
3067         uint8_t md5[16];
3068
3069         av_md5_init(s->md5_ctx);
3070         for (j = 0; j < h; j++) {
3071             const uint8_t *src = frame->data[i] + j * frame->linesize[i];
3072 #if HAVE_BIGENDIAN
3073             if (pixel_shift) {
3074                 s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
3075                                     (const uint16_t *) src, w);
3076                 src = s->checksum_buf;
3077             }
3078 #endif
3079             av_md5_update(s->md5_ctx, src, w << pixel_shift);
3080         }
3081         av_md5_final(s->md5_ctx, md5);
3082
3083         if (!memcmp(md5, s->md5[i], 16)) {
3084             av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
3085             print_md5(s->avctx, AV_LOG_DEBUG, md5);
3086             av_log   (s->avctx, AV_LOG_DEBUG, "; ");
3087         } else {
3088             av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
3089             print_md5(s->avctx, AV_LOG_ERROR, md5);
3090             av_log   (s->avctx, AV_LOG_ERROR, " != ");
3091             print_md5(s->avctx, AV_LOG_ERROR, s->md5[i]);
3092             av_log   (s->avctx, AV_LOG_ERROR, "\n");
3093             return AVERROR_INVALIDDATA;
3094         }
3095     }
3096
3097     av_log(s->avctx, AV_LOG_DEBUG, "\n");
3098
3099     return 0;
3100 }
3101
3102 static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
3103                              AVPacket *avpkt)
3104 {
3105     int ret;
3106     HEVCContext *s = avctx->priv_data;
3107
3108     if (!avpkt->size) {
3109         ret = ff_hevc_output_frame(s, data, 1);
3110         if (ret < 0)
3111             return ret;
3112
3113         *got_output = ret;
3114         return 0;
3115     }
3116
3117     s->ref = NULL;
3118     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
3119     if (ret < 0)
3120         return ret;
3121
3122     if (avctx->hwaccel) {
3123         if (s->ref && avctx->hwaccel->end_frame(avctx) < 0)
3124             av_log(avctx, AV_LOG_ERROR,
3125                    "hardware accelerator failed to decode picture\n");
3126     } else {
3127         /* verify the SEI checksum */
3128         if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
3129             s->is_md5) {
3130             ret = verify_md5(s, s->ref->frame);
3131             if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
3132                 ff_hevc_unref_frame(s, s->ref, ~0);
3133                 return ret;
3134             }
3135         }
3136     }
3137     s->is_md5 = 0;
3138
3139     if (s->is_decoded) {
3140         av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
3141         s->is_decoded = 0;
3142     }
3143
3144     if (s->output_frame->buf[0]) {
3145         av_frame_move_ref(data, s->output_frame);
3146         *got_output = 1;
3147     }
3148
3149     return avpkt->size;
3150 }
3151
3152 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
3153 {
3154     int ret;
3155
3156     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
3157     if (ret < 0)
3158         return ret;
3159
3160     dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
3161     if (!dst->tab_mvf_buf)
3162         goto fail;
3163     dst->tab_mvf = src->tab_mvf;
3164
3165     dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
3166     if (!dst->rpl_tab_buf)
3167         goto fail;
3168     dst->rpl_tab = src->rpl_tab;
3169
3170     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
3171     if (!dst->rpl_buf)
3172         goto fail;
3173
3174     dst->poc        = src->poc;
3175     dst->ctb_count  = src->ctb_count;
3176     dst->window     = src->window;
3177     dst->flags      = src->flags;
3178     dst->sequence   = src->sequence;
3179
3180     if (src->hwaccel_picture_private) {
3181         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
3182         if (!dst->hwaccel_priv_buf)
3183             goto fail;
3184         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
3185     }
3186
3187     return 0;
3188 fail:
3189     ff_hevc_unref_frame(s, dst, ~0);
3190     return AVERROR(ENOMEM);
3191 }
3192
3193 static av_cold int hevc_decode_free(AVCodecContext *avctx)
3194 {
3195     HEVCContext       *s = avctx->priv_data;
3196     int i;
3197
3198     pic_arrays_free(s);
3199
3200     av_freep(&s->md5_ctx);
3201
3202     for(i=0; i < s->nals_allocated; i++) {
3203         av_freep(&s->skipped_bytes_pos_nal[i]);
3204     }
3205     av_freep(&s->skipped_bytes_pos_size_nal);
3206     av_freep(&s->skipped_bytes_nal);
3207     av_freep(&s->skipped_bytes_pos_nal);
3208
3209     av_freep(&s->cabac_state);
3210
3211     for (i = 0; i < 3; i++) {
3212         av_freep(&s->sao_pixel_buffer_h[i]);
3213         av_freep(&s->sao_pixel_buffer_v[i]);
3214     }
3215     av_frame_free(&s->output_frame);
3216
3217     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3218         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3219         av_frame_free(&s->DPB[i].frame);
3220     }
3221
3222     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++)
3223         av_buffer_unref(&s->vps_list[i]);
3224     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
3225         av_buffer_unref(&s->sps_list[i]);
3226     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
3227         av_buffer_unref(&s->pps_list[i]);
3228     s->sps = NULL;
3229     s->pps = NULL;
3230     s->vps = NULL;
3231
3232     av_freep(&s->sh.entry_point_offset);
3233     av_freep(&s->sh.offset);
3234     av_freep(&s->sh.size);
3235
3236     for (i = 1; i < s->threads_number; i++) {
3237         HEVCLocalContext *lc = s->HEVClcList[i];
3238         if (lc) {
3239             av_freep(&s->HEVClcList[i]);
3240             av_freep(&s->sList[i]);
3241         }
3242     }
3243     if (s->HEVClc == s->HEVClcList[0])
3244         s->HEVClc = NULL;
3245     av_freep(&s->HEVClcList[0]);
3246
3247     for (i = 0; i < s->nals_allocated; i++)
3248         av_freep(&s->nals[i].rbsp_buffer);
3249     av_freep(&s->nals);
3250     s->nals_allocated = 0;
3251
3252     return 0;
3253 }
3254
3255 static av_cold int hevc_init_context(AVCodecContext *avctx)
3256 {
3257     HEVCContext *s = avctx->priv_data;
3258     int i;
3259
3260     s->avctx = avctx;
3261
3262     s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
3263     if (!s->HEVClc)
3264         goto fail;
3265     s->HEVClcList[0] = s->HEVClc;
3266     s->sList[0] = s;
3267
3268     s->cabac_state = av_malloc(HEVC_CONTEXTS);
3269     if (!s->cabac_state)
3270         goto fail;
3271
3272     s->output_frame = av_frame_alloc();
3273     if (!s->output_frame)
3274         goto fail;
3275
3276     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3277         s->DPB[i].frame = av_frame_alloc();
3278         if (!s->DPB[i].frame)
3279             goto fail;
3280         s->DPB[i].tf.f = s->DPB[i].frame;
3281     }
3282
3283     s->max_ra = INT_MAX;
3284
3285     s->md5_ctx = av_md5_alloc();
3286     if (!s->md5_ctx)
3287         goto fail;
3288
3289     ff_bswapdsp_init(&s->bdsp);
3290
3291     s->context_initialized = 1;
3292     s->eos = 0;
3293
3294     return 0;
3295
3296 fail:
3297     hevc_decode_free(avctx);
3298     return AVERROR(ENOMEM);
3299 }
3300
3301 static int hevc_update_thread_context(AVCodecContext *dst,
3302                                       const AVCodecContext *src)
3303 {
3304     HEVCContext *s  = dst->priv_data;
3305     HEVCContext *s0 = src->priv_data;
3306     int i, ret;
3307
3308     if (!s->context_initialized) {
3309         ret = hevc_init_context(dst);
3310         if (ret < 0)
3311             return ret;
3312     }
3313
3314     for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
3315         ff_hevc_unref_frame(s, &s->DPB[i], ~0);
3316         if (s0->DPB[i].frame->buf[0]) {
3317             ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
3318             if (ret < 0)
3319                 return ret;
3320         }
3321     }
3322
3323     if (s->sps != s0->sps)
3324         s->sps = NULL;
3325     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
3326         av_buffer_unref(&s->vps_list[i]);
3327         if (s0->vps_list[i]) {
3328             s->vps_list[i] = av_buffer_ref(s0->vps_list[i]);
3329             if (!s->vps_list[i])
3330                 return AVERROR(ENOMEM);
3331         }
3332     }
3333
3334     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3335         av_buffer_unref(&s->sps_list[i]);
3336         if (s0->sps_list[i]) {
3337             s->sps_list[i] = av_buffer_ref(s0->sps_list[i]);
3338             if (!s->sps_list[i])
3339                 return AVERROR(ENOMEM);
3340         }
3341     }
3342
3343     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++) {
3344         av_buffer_unref(&s->pps_list[i]);
3345         if (s0->pps_list[i]) {
3346             s->pps_list[i] = av_buffer_ref(s0->pps_list[i]);
3347             if (!s->pps_list[i])
3348                 return AVERROR(ENOMEM);
3349         }
3350     }
3351
3352     if (s->sps != s0->sps)
3353         if ((ret = set_sps(s, s0->sps, src->pix_fmt)) < 0)
3354             return ret;
3355
3356     s->seq_decode = s0->seq_decode;
3357     s->seq_output = s0->seq_output;
3358     s->pocTid0    = s0->pocTid0;
3359     s->max_ra     = s0->max_ra;
3360     s->eos        = s0->eos;
3361
3362     s->is_nalff        = s0->is_nalff;
3363     s->nal_length_size = s0->nal_length_size;
3364
3365     s->threads_number      = s0->threads_number;
3366     s->threads_type        = s0->threads_type;
3367
3368     if (s0->eos) {
3369         s->seq_decode = (s->seq_decode + 1) & 0xff;
3370         s->max_ra = INT_MAX;
3371     }
3372
3373     return 0;
3374 }
3375
3376 static int hevc_decode_extradata(HEVCContext *s)
3377 {
3378     AVCodecContext *avctx = s->avctx;
3379     GetByteContext gb;
3380     int ret, i;
3381
3382     bytestream2_init(&gb, avctx->extradata, avctx->extradata_size);
3383
3384     if (avctx->extradata_size > 3 &&
3385         (avctx->extradata[0] || avctx->extradata[1] ||
3386          avctx->extradata[2] > 1)) {
3387         /* It seems the extradata is encoded as hvcC format.
3388          * Temporarily, we support configurationVersion==0 until 14496-15 3rd
3389          * is finalized. When finalized, configurationVersion will be 1 and we
3390          * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
3391         int i, j, num_arrays, nal_len_size;
3392
3393         s->is_nalff = 1;
3394
3395         bytestream2_skip(&gb, 21);
3396         nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
3397         num_arrays   = bytestream2_get_byte(&gb);
3398
3399         /* nal units in the hvcC always have length coded with 2 bytes,
3400          * so put a fake nal_length_size = 2 while parsing them */
3401         s->nal_length_size = 2;
3402
3403         /* Decode nal units from hvcC. */
3404         for (i = 0; i < num_arrays; i++) {
3405             int type = bytestream2_get_byte(&gb) & 0x3f;
3406             int cnt  = bytestream2_get_be16(&gb);
3407
3408             for (j = 0; j < cnt; j++) {
3409                 // +2 for the nal size field
3410                 int nalsize = bytestream2_peek_be16(&gb) + 2;
3411                 if (bytestream2_get_bytes_left(&gb) < nalsize) {
3412                     av_log(s->avctx, AV_LOG_ERROR,
3413                            "Invalid NAL unit size in extradata.\n");
3414                     return AVERROR_INVALIDDATA;
3415                 }
3416
3417                 ret = decode_nal_units(s, gb.buffer, nalsize);
3418                 if (ret < 0) {
3419                     av_log(avctx, AV_LOG_ERROR,
3420                            "Decoding nal unit %d %d from hvcC failed\n",
3421                            type, i);
3422                     return ret;
3423                 }
3424                 bytestream2_skip(&gb, nalsize);
3425             }
3426         }
3427
3428         /* Now store right nal length size, that will be used to parse
3429          * all other nals */
3430         s->nal_length_size = nal_len_size;
3431     } else {
3432         s->is_nalff = 0;
3433         ret = decode_nal_units(s, avctx->extradata, avctx->extradata_size);
3434         if (ret < 0)
3435             return ret;
3436     }
3437
3438     /* export stream parameters from the first SPS */
3439     for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++) {
3440         if (s->sps_list[i]) {
3441             const HEVCSPS *sps = (const HEVCSPS*)s->sps_list[i]->data;
3442             export_stream_params(s->avctx, s, sps);
3443             break;
3444         }
3445     }
3446
3447     return 0;
3448 }
3449
3450 static av_cold int hevc_decode_init(AVCodecContext *avctx)
3451 {
3452     HEVCContext *s = avctx->priv_data;
3453     int ret;
3454
3455     ff_init_cabac_states();
3456
3457     avctx->internal->allocate_progress = 1;
3458
3459     ret = hevc_init_context(avctx);
3460     if (ret < 0)
3461         return ret;
3462
3463     s->enable_parallel_tiles = 0;
3464     s->picture_struct = 0;
3465
3466     if(avctx->active_thread_type & FF_THREAD_SLICE)
3467         s->threads_number = avctx->thread_count;
3468     else
3469         s->threads_number = 1;
3470
3471     if (avctx->extradata_size > 0 && avctx->extradata) {
3472         ret = hevc_decode_extradata(s);
3473         if (ret < 0) {
3474             hevc_decode_free(avctx);
3475             return ret;
3476         }
3477     }
3478
3479     if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
3480             s->threads_type = FF_THREAD_FRAME;
3481         else
3482             s->threads_type = FF_THREAD_SLICE;
3483
3484     return 0;
3485 }
3486
3487 static av_cold int hevc_init_thread_copy(AVCodecContext *avctx)
3488 {
3489     HEVCContext *s = avctx->priv_data;
3490     int ret;
3491
3492     memset(s, 0, sizeof(*s));
3493
3494     ret = hevc_init_context(avctx);
3495     if (ret < 0)
3496         return ret;
3497
3498     return 0;
3499 }
3500
3501 static void hevc_decode_flush(AVCodecContext *avctx)
3502 {
3503     HEVCContext *s = avctx->priv_data;
3504     ff_hevc_flush_dpb(s);
3505     s->max_ra = INT_MAX;
3506 }
3507
3508 #define OFFSET(x) offsetof(HEVCContext, x)
3509 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
3510
3511 static const AVProfile profiles[] = {
3512     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
3513     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
3514     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
3515     { FF_PROFILE_HEVC_REXT,                 "Rext"  },
3516     { FF_PROFILE_UNKNOWN },
3517 };
3518
3519 static const AVOption options[] = {
3520     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
3521         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3522     { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
3523         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
3524     { NULL },
3525 };
3526
3527 static const AVClass hevc_decoder_class = {
3528     .class_name = "HEVC decoder",
3529     .item_name  = av_default_item_name,
3530     .option     = options,
3531     .version    = LIBAVUTIL_VERSION_INT,
3532 };
3533
3534 AVCodec ff_hevc_decoder = {
3535     .name                  = "hevc",
3536     .long_name             = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
3537     .type                  = AVMEDIA_TYPE_VIDEO,
3538     .id                    = AV_CODEC_ID_HEVC,
3539     .priv_data_size        = sizeof(HEVCContext),
3540     .priv_class            = &hevc_decoder_class,
3541     .init                  = hevc_decode_init,
3542     .close                 = hevc_decode_free,
3543     .decode                = hevc_decode_frame,
3544     .flush                 = hevc_decode_flush,
3545     .update_thread_context = hevc_update_thread_context,
3546     .init_thread_copy      = hevc_init_thread_copy,
3547     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
3548                              CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
3549     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
3550 };